diff --git a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java index b99355808bb..e375bfd22a0 100644 --- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java +++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java @@ -61,9 +61,8 @@ import java.io.Reader; /** - *
* CJKTokenizer was modified from StopTokenizer which does a decent job for
- * most European languages. and it perferm other token method for double-byte
+ * most European languages. It performs other token methods for double-byte
* Characters: the token will return at each two charactors with overlap match.
* Example: "java C1C2C3C4" will be segment to: "java" "C1C2" "C2C3" "C3C4" it
* also need filter filter zero length token ""
@@ -71,7 +70,6 @@ import java.io.Reader;
* for more info on Asia language(Chinese Japanese Korean) text segmentation:
* please search google
- *