LUCENE-2413: move high-level charfilters to contrib/analyzers

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@940676 13f79535-47bb-0310-9956-ffa450edef68
2010-05-03 23:36:03 +00:00 · 2010-05-03 23:36:03 +00:00 · f9249e3a74
parent 5b74a4ec61
commit f9249e3a74
14 changed files with 26 additions and 464 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -11,6 +11,9 @@ Changes in backwards compatibility policy
  - o.a.l.analysis.LengthFilter -> o.a.l.analysis.miscellaneous.LengthFilter
  - o.a.l.analysis.PerFieldAnalyzerWrapper -> o.a.l.analysis.miscellaneous.PerFieldAnalyzerWrapper
  - o.a.l.analysis.TeeSinkTokenFilter -> o.a.l.analysis.sinks.TeeSinkTokenFilter
+  - o.a.l.analysis.BaseCharFilter -> o.a.l.analysis.charfilter.BaseCharFilter
+  - o.a.l.analysis.MappingCharFilter -> o.a.l.analysis.charfilter.MappingCharFilter
+  - o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
  ... (in progress)

 * LUCENE-1458, LUCENE-2111, LUCENE-2354: Changes from flexible indexing:
--- a/lucene/backwards/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
+++ b/lucene/backwards/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
@ -1,61 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Holds a map of String input to String output, to be used
- * with {@link MappingCharFilter}.
- */
-public class NormalizeCharMap {
-
-  Map<Character, NormalizeCharMap> submap;
-  String normStr;
-  int diff;
-
-  /** Records a replacement to be applied to the inputs
-   *  stream.  Whenever <code>singleMatch</code> occurs in
-   *  the input, it will be replaced with
-   *  <code>replacement</code>.
-   *
-   * @param singleMatch input String to be replaced
-   * @param replacement output String
-   */
-  public void add(String singleMatch, String replacement) {
-    NormalizeCharMap currMap = this;
-    for(int i = 0; i < singleMatch.length(); i++) {
-      char c = singleMatch.charAt(i);
-      if (currMap.submap == null) {
-        currMap.submap = new HashMap<Character, NormalizeCharMap>(1);
-      }
-      NormalizeCharMap map = currMap.submap.get(Character.valueOf(c));
-      if (map == null) {
-        map = new NormalizeCharMap();
-        currMap.submap.put(Character.valueOf(c), map);
-      }
-      currMap = map;
-    }
-    if (currMap.normStr != null) {
-      throw new RuntimeException("MappingCharFilter: there is already a mapping for " + singleMatch);
-    }
-    currMap.normStr = replacement;
-    currMap.diff = singleMatch.length() - replacement.length();
-  }
-}
--- a/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
+++ b/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
@ -1,162 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-import java.io.StringReader;
-import java.util.List;
-
-public class TestMappingCharFilter extends BaseTokenStreamTestCase {
-
-  NormalizeCharMap normMap;
-
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-    normMap = new NormalizeCharMap();
-
-    normMap.add( "aa", "a" );
-    normMap.add( "bbb", "b" );
-    normMap.add( "cccc", "cc" );
-
-    normMap.add( "h", "i" );
-    normMap.add( "j", "jj" );
-    normMap.add( "k", "kkk" );
-    normMap.add( "ll", "llll" );
-
-    normMap.add( "empty", "" );
-  }
-
-  public void testReaderReset() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
-    char[] buf = new char[10];
-    int len = cs.read(buf, 0, 10);
-    assertEquals( 1, len );
-    assertEquals( 'x', buf[0]) ;
-    len = cs.read(buf, 0, 10);
-    assertEquals( -1, len );
-
-    // rewind
-    cs.reset();
-    len = cs.read(buf, 0, 10);
-    assertEquals( 1, len );
-    assertEquals( 'x', buf[0]) ;
-  }
-
-  public void testNothingChange() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
-  }
-
-  public void test1to1() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
-  }
-
-  public void test1to2() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
-  }
-
-  public void test1to3() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
-  }
-
-  public void test2to4() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
-  }
-
-  public void test2to1() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
-  }
-
-  public void test3to1() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
-  }
-
-  public void test4to2() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
-  }
-
-  public void test5to0() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts, new String[0]);
-  }
-
-  //
-  //                1111111111222
-  //      01234567890123456789012
-  //(in)  h i j k ll cccc bbb aa
-  //
-  //                1111111111222
-  //      01234567890123456789012
-  //(out) i i jj kkk llll cc b a
-  //
-  //    h, 0, 1 =>    i, 0, 1
-  //    i, 2, 3 =>    i, 2, 3
-  //    j, 4, 5 =>   jj, 4, 5
-  //    k, 6, 7 =>  kkk, 6, 7
-  //   ll, 8,10 => llll, 8,10
-  // cccc,11,15 =>   cc,11,15
-  //  bbb,16,19 =>    b,16,19
-  //   aa,20,22 =>    a,20,22
-  //
-  public void testTokenStream() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts,
-      new String[]{"i","i","jj","kkk","llll","cc","b","a"},
-      new int[]{0,2,4,6,8,11,16,20},
-      new int[]{1,3,5,7,10,15,19,22}
-    );
-  }
-
-  //
-  //
-  //        0123456789
-  //(in)    aaaa ll h
-  //(out-1) aa llll i
-  //(out-2) a llllllll i
-  //
-  // aaaa,0,4 => a,0,4
-  //   ll,5,7 => llllllll,5,7
-  //    h,8,9 => i,8,9
-  public void testChained() throws Exception {
-    CharStream cs = new MappingCharFilter( normMap,
-        new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
-    TokenStream ts = new WhitespaceTokenizer( cs );
-    assertTokenStreamContents(ts,
-      new String[]{"a","llllllll","i"},
-      new int[]{0,5,8},
-      new int[]{4,7,9}
-    );
-  }
-}
--- a/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
+++ b/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java
@ -15,11 +15,14 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.charfilter;

 import java.util.ArrayList;
 import java.util.List;

+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.CharStream;
+
 /**
 * Base utility class for implementing a {@link CharFilter}.
 * You subclass this, and then record mappings by calling
--- a/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
+++ b/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
@ -15,12 +15,15 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.charfilter;

 import java.io.IOException;
 import java.io.Reader;
 import java.util.LinkedList;

+import org.apache.lucene.analysis.CharReader;
+import org.apache.lucene.analysis.CharStream;
+
 /**
 * Simplistic {@link CharFilter} that applies the mappings
 * contained in a {@link NormalizeCharMap} to the character
--- a/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
+++ b/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
@ -15,7 +15,7 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.charfilter;

 import java.util.HashMap;
 import java.util.Map;
--- a/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
+++ b/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
@ -15,10 +15,16 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.charfilter;

 import java.io.StringReader;

+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharReader;
+import org.apache.lucene.analysis.CharStream;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
 public class TestMappingCharFilter extends BaseTokenStreamTestCase {

  NormalizeCharMap normMap;
--- a/lucene/src/java/org/apache/lucene/analysis/BaseCharFilter.java
+++ b/lucene/src/java/org/apache/lucene/analysis/BaseCharFilter.java
@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Base utility class for implementing a {@link CharFilter}.
- * You subclass this, and then record mappings by calling
- * {@link #addOffCorrectMap}, and then invoke the correct
- * method to correct an offset.
- *
- * <p><b>NOTE</b>: This class is not particularly efficient.
- * For example, a new class instance is created for every
- * call to {@link #addOffCorrectMap}, which is then appended
- * to a private list.
- */
-public abstract class BaseCharFilter extends CharFilter {
-
-  private List<OffCorrectMap> pcmList;
-  
-  public BaseCharFilter(CharStream in) {
-    super(in);
-  }
-
-  /** Retrieve the corrected offset.  Note that this method
-   *  is slow, if you correct positions far before the most
-   *  recently added position, as it's a simple linear
-   *  search backwards through all offset corrections added
-   *  by {@link #addOffCorrectMap}. */
-  @Override
-  protected int correct(int currentOff) {
-    if (pcmList == null || pcmList.isEmpty()) {
-      return currentOff;
-    }
-    for (int i = pcmList.size() - 1; i >= 0; i--) {
-      if (currentOff >=  pcmList.get(i).off) {
-        return currentOff + pcmList.get(i).cumulativeDiff;
-      }
-    }
-    return currentOff;
-  }
-  
-  protected int getLastCumulativeDiff() {
-    return pcmList == null || pcmList.isEmpty() ?
-      0 : pcmList.get(pcmList.size() - 1).cumulativeDiff;
-  }
-
-  protected void addOffCorrectMap(int off, int cumulativeDiff) {
-    if (pcmList == null) {
-      pcmList = new ArrayList<OffCorrectMap>();
-    }
-    pcmList.add(new OffCorrectMap(off, cumulativeDiff));
-  }
-
-  static class OffCorrectMap {
-
-    int off;
-    int cumulativeDiff;
-
-    OffCorrectMap(int off, int cumulativeDiff) {
-      this.off = off;
-      this.cumulativeDiff = cumulativeDiff;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder();
-      sb.append('(');
-      sb.append(off);
-      sb.append(',');
-      sb.append(cumulativeDiff);
-      sb.append(')');
-      return sb.toString();
-    }
-  }
-}
--- a/lucene/src/java/org/apache/lucene/analysis/MappingCharFilter.java
+++ b/lucene/src/java/org/apache/lucene/analysis/MappingCharFilter.java
@ -1,137 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.LinkedList;
-
-/**
- * Simplistic {@link CharFilter} that applies the mappings
- * contained in a {@link NormalizeCharMap} to the character
- * stream, and correcting the resulting changes to the
- * offsets.
- */
-public class MappingCharFilter extends BaseCharFilter {
-
-  private final NormalizeCharMap normMap;
-  private LinkedList<Character> buffer;
-  private String replacement;
-  private int charPointer;
-  private int nextCharCounter;
-
-  /** Default constructor that takes a {@link CharStream}. */
-  public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
-    super(in);
-    this.normMap = normMap;
-  }
-
-  /** Easy-use constructor that takes a {@link Reader}. */
-  public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
-    super(CharReader.get(in));
-    this.normMap = normMap;
-  }
-
-  @Override
-  public int read() throws IOException {
-    while(true) {
-      if (replacement != null && charPointer < replacement.length()) {
-        return replacement.charAt(charPointer++);
-      }
-
-      int firstChar = nextChar();
-      if (firstChar == -1) return -1;
-      NormalizeCharMap nm = normMap.submap != null ?
-        normMap.submap.get(Character.valueOf((char) firstChar)) : null;
-      if (nm == null) return firstChar;
-      NormalizeCharMap result = match(nm);
-      if (result == null) return firstChar;
-      replacement = result.normStr;
-      charPointer = 0;
-      if (result.diff != 0) {
-        int prevCumulativeDiff = getLastCumulativeDiff();
-        if (result.diff < 0) {
-          for(int i = 0; i < -result.diff ; i++)
-            addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
-        } else {
-          addOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
-        }
-      }
-    }
-  }
-
-  private int nextChar() throws IOException {
-    nextCharCounter++;
-    if (buffer != null && !buffer.isEmpty()) {
-      return buffer.removeFirst().charValue();
-    }
-    return input.read();
-  }
-
-  private void pushChar(int c) {
-    nextCharCounter--;
-    if(buffer == null)
-      buffer = new LinkedList<Character>();
-    buffer.addFirst(Character.valueOf((char) c));
-  }
-
-  private void pushLastChar(int c) {
-    if (buffer == null) {
-      buffer = new LinkedList<Character>();
-    }
-    buffer.addLast(Character.valueOf((char) c));
-  }
-
-  private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
-    NormalizeCharMap result = null;
-    if (map.submap != null) {
-      int chr = nextChar();
-      if (chr != -1) {
-        NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
-        if (subMap != null) {
-          result = match(subMap);
-        }
-        if (result == null) {
-          pushChar(chr);
-        }
-      }
-    }
-    if (result == null && map.normStr != null) {
-      result = map;
-    }
-    return result;
-  }
-
-  @Override
-  public int read(char[] cbuf, int off, int len) throws IOException {
-    char[] tmp = new char[len];
-    int l = input.read(tmp, 0, len);
-    if (l != -1) {
-      for(int i = 0; i < l; i++)
-        pushLastChar(tmp[i]);
-    }
-    l = 0;
-    for(int i = off; i < off + len; i++) {
-      int c = read();
-      if (c == -1) break;
-      cbuf[i] = (char) c;
-      l++;
-    }
-    return l == 0 ? -1 : l;
-  }
-}
--- a/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/HTMLStripCharFilter.java
@ -24,7 +24,7 @@ import java.io.Reader;
 import java.util.HashMap;
 import java.util.Set;

-import org.apache.lucene.analysis.BaseCharFilter;
+import org.apache.lucene.analysis.charfilter.BaseCharFilter;
 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.CharStream;

--- a/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
@ -25,8 +25,8 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.MappingCharFilter;
-import org.apache.lucene.analysis.NormalizeCharMap;
+import org.apache.lucene.analysis.charfilter.MappingCharFilter;
+import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
--- a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilter.java
@ -22,7 +22,7 @@ import java.util.LinkedList;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

-import org.apache.lucene.analysis.BaseCharFilter;
+import org.apache.lucene.analysis.charfilter.BaseCharFilter;
 import org.apache.lucene.analysis.CharStream;

 /**
--- a/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
@ -26,8 +26,8 @@ import java.util.Map;

 import org.apache.lucene.analysis.CharReader;
 import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.MappingCharFilter;
-import org.apache.lucene.analysis.NormalizeCharMap;
+import org.apache.lucene.analysis.charfilter.MappingCharFilter;
+import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

--- a/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
+++ b/solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
@ -312,7 +312,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
    assertNotNull("expecting an index token analysis for field type 'charfilthtmlmap'", indexPart);
    
    assertEquals("  whátëvêr  ", indexPart.get("org.apache.solr.analysis.HTMLStripCharFilter"));
-    assertEquals("  whatever  ", indexPart.get("org.apache.lucene.analysis.MappingCharFilter"));
+    assertEquals("  whatever  ", indexPart.get("org.apache.lucene.analysis.charfilter.MappingCharFilter"));

    List<NamedList> tokenList = (List<NamedList>)indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer");
    assertNotNull("Expecting WhitespaceTokenizer analysis breakdown", tokenList);