mirror of https://github.com/apache/lucene.git
Fix some minor issues in TrieTokenizer and also add support for offsets (same way like KeywordTokenizer)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1060997 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e731a4b64
commit
6a9f686f58
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.NumericTokenStream;
|
import org.apache.lucene.analysis.NumericTokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.schema.DateField;
|
import org.apache.solr.schema.DateField;
|
||||||
|
@ -57,6 +58,9 @@ final class TrieTokenizer extends Tokenizer {
|
||||||
protected final TrieTypes type;
|
protected final TrieTypes type;
|
||||||
protected final NumericTokenStream ts;
|
protected final NumericTokenStream ts;
|
||||||
|
|
||||||
|
protected final OffsetAttribute ofsAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
protected int startOfs, endOfs;
|
||||||
|
|
||||||
static NumericTokenStream getNumericTokenStream(int precisionStep) {
|
static NumericTokenStream getNumericTokenStream(int precisionStep) {
|
||||||
return new NumericTokenStream(precisionStep);
|
return new NumericTokenStream(precisionStep);
|
||||||
}
|
}
|
||||||
|
@ -82,6 +86,8 @@ final class TrieTokenizer extends Tokenizer {
|
||||||
input = super.input;
|
input = super.input;
|
||||||
char[] buf = new char[32];
|
char[] buf = new char[32];
|
||||||
int len = input.read(buf);
|
int len = input.read(buf);
|
||||||
|
this.startOfs = correctOffset(0);
|
||||||
|
this.endOfs = correctOffset(len);
|
||||||
String v = new String(buf, 0, len);
|
String v = new String(buf, 0, len);
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case INTEGER:
|
case INTEGER:
|
||||||
|
@ -105,13 +111,32 @@ final class TrieTokenizer extends Tokenizer {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
super.close();
|
||||||
|
ts.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
return ts.incrementToken();
|
if (ts.incrementToken()) {
|
||||||
|
ofsAtt.setOffset(startOfs, endOfs);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
ts.end();
|
||||||
|
ofsAtt.setOffset(endOfs, endOfs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue