mirror of https://github.com/apache/lucene.git
LUCENE-1320
ShingleMatrixFilter JDK downgrade 1.5 -> 1.4 Grant Ingersoll via Karl Wettin git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@694393 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d31fb4556c
commit
71f2d8199b
|
@ -23,8 +23,8 @@
|
|||
Additional Analyzers
|
||||
</description>
|
||||
|
||||
<property name="javac.source" value="1.5" />
|
||||
<property name="javac.target" value="1.5" />
|
||||
|
||||
<property name="javac.source" value="1.4" />
|
||||
<property name="javac.target" value="1.4" />
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
</project>
|
||||
|
|
|
@ -108,7 +108,7 @@ import org.apache.lucene.index.Payload;
|
|||
*/
|
||||
public class ShingleMatrixFilter extends TokenStream {
|
||||
|
||||
public static Character defaultSpacerCharacter = '_';
|
||||
public static Character defaultSpacerCharacter = new Character('_');
|
||||
public static TokenSettingsCodec defaultSettingsCodec = new OneDimensionalNonWeightedTokenSettingsCodec();
|
||||
public static boolean ignoringSinglePrefixOrSuffixShingleByDefault = false;
|
||||
|
||||
|
@ -155,10 +155,10 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* @see org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenSettingsCodec#getTokenPositioner(org.apache.lucene.analysis.Token)
|
||||
* @see org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenSettingsCodec#setTokenPositioner(org.apache.lucene.analysis.Token,org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenPositioner)
|
||||
*/
|
||||
public static enum TokenPositioner {
|
||||
newColumn(0),
|
||||
newRow(1),
|
||||
sameRow(2);
|
||||
public static class TokenPositioner {
|
||||
public static final TokenPositioner newColumn = new TokenPositioner(0);
|
||||
public static final TokenPositioner newRow = new TokenPositioner(1);
|
||||
public static final TokenPositioner sameRow = new TokenPositioner(2);
|
||||
|
||||
private final int index;
|
||||
|
||||
|
@ -180,7 +180,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
|
||||
private boolean ignoringSinglePrefixOrSuffixShingle = false;
|
||||
|
||||
private Character spacerCharacter = '_';
|
||||
private Character spacerCharacter = defaultSpacerCharacter;
|
||||
|
||||
private TokenStream input;
|
||||
|
||||
|
@ -279,12 +279,12 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
// internal filter instance variables
|
||||
|
||||
/** iterator over the current matrix row permutations */
|
||||
private Iterator<Matrix.Column.Row[]> permutations;
|
||||
private Iterator permutations;
|
||||
|
||||
/** the current permutation of tokens used to produce shingles */
|
||||
private List<Token> currentPermuationTokens;
|
||||
private List currentPermuationTokens;
|
||||
/** index to what row a token in currentShingleTokens represents*/
|
||||
private List<Matrix.Column.Row> currentPermutationRows;
|
||||
private List currentPermutationRows;
|
||||
|
||||
private int currentPermutationTokensStartOffset;
|
||||
private int currentShingleLength;
|
||||
|
@ -293,7 +293,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* a set containing shingles that has been the result of a call to next(Token),
|
||||
* used to avoid producing the same shingle more than once.
|
||||
*/
|
||||
private Set<List<Token>> shinglesSeen = new HashSet<List<Token>>();
|
||||
private Set shinglesSeen = new HashSet();
|
||||
|
||||
|
||||
public void reset() throws IOException {
|
||||
|
@ -324,16 +324,16 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
|
||||
if (ignoringSinglePrefixOrSuffixShingle
|
||||
&& currentShingleLength == 1
|
||||
&& (currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isFirst() || currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isLast())) {
|
||||
&& (((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst() || ((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast())) {
|
||||
return next(reusableToken);
|
||||
}
|
||||
|
||||
int termLength = 0;
|
||||
|
||||
List<Token> shingle = new ArrayList<Token>();
|
||||
List shingle = new ArrayList();
|
||||
|
||||
for (int i = 0; i < currentShingleLength; i++) {
|
||||
Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
|
||||
Token shingleToken = (Token) currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
|
||||
termLength += shingleToken.termLength();
|
||||
shingle.add(shingleToken);
|
||||
}
|
||||
|
@ -347,8 +347,9 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
}
|
||||
|
||||
// shingle token factory
|
||||
StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future.
|
||||
for (Token shingleToken : shingle) {
|
||||
StringBuffer sb = new StringBuffer(termLength + 10); // paranormal ability to foresee the future.
|
||||
for (Iterator iterator = shingle.iterator(); iterator.hasNext();) {
|
||||
Token shingleToken = (Token) iterator.next();
|
||||
if (spacerCharacter != null && sb.length() > 0) {
|
||||
sb.append(spacerCharacter);
|
||||
}
|
||||
|
@ -388,18 +389,22 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
// get rith of resources
|
||||
|
||||
// delete the first column in the matrix
|
||||
Matrix.Column deletedColumn = matrix.columns.remove(0);
|
||||
Matrix.Column deletedColumn = (Matrix.Column) matrix.columns.remove(0);
|
||||
|
||||
// remove all shingles seen that include any of the tokens from the deleted column.
|
||||
List<Token> deletedColumnTokens = new ArrayList<Token>();
|
||||
for (Matrix.Column.Row row : deletedColumn.getRows()) {
|
||||
for (Token shingleToken : row.getTokens()) {
|
||||
deletedColumnTokens.add(shingleToken);
|
||||
List deletedColumnTokens = new ArrayList();
|
||||
for (Iterator iterator = deletedColumn.getRows().iterator(); iterator.hasNext();) {
|
||||
Matrix.Column.Row row = (Matrix.Column.Row) iterator.next();
|
||||
for (Iterator rowIter = row.getTokens().iterator(); rowIter.hasNext();) {
|
||||
Object o = rowIter.next();//Token
|
||||
deletedColumnTokens.add(o);
|
||||
}
|
||||
|
||||
}
|
||||
for (Iterator<List<Token>> shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
|
||||
List<Token> shingle = shinglesSeenIterator.next();
|
||||
for (Token deletedColumnToken : deletedColumnTokens) {
|
||||
for (Iterator shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
|
||||
List shingle = (List) shinglesSeenIterator.next();
|
||||
for (Iterator deletedIter = deletedColumnTokens.iterator(); deletedIter.hasNext();) {
|
||||
Token deletedColumnToken = (Token) deletedIter.next();
|
||||
if (shingle.contains(deletedColumnToken)) {
|
||||
shinglesSeenIterator.remove();
|
||||
break;
|
||||
|
@ -443,12 +448,14 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* finally resets the current (next) shingle size and offset.
|
||||
*/
|
||||
private void nextTokensPermutation() {
|
||||
Matrix.Column.Row[] rowsPermutation = permutations.next();
|
||||
List<Matrix.Column.Row> currentPermutationRows = new ArrayList<Matrix.Column.Row>();
|
||||
List<Token> currentPermuationTokens = new ArrayList<Token>();
|
||||
for (Matrix.Column.Row row : rowsPermutation) {
|
||||
for (Token shingleToken : row.getTokens()) {
|
||||
currentPermuationTokens.add(shingleToken);
|
||||
Matrix.Column.Row[] rowsPermutation;
|
||||
rowsPermutation = (Matrix.Column.Row[]) permutations.next();
|
||||
List currentPermutationRows = new ArrayList();
|
||||
List currentPermuationTokens = new ArrayList();
|
||||
for (int i = 0; i < rowsPermutation.length; i++) {
|
||||
Matrix.Column.Row row = rowsPermutation[i];
|
||||
for (Iterator iterator = row.getTokens().iterator(); iterator.hasNext();) {
|
||||
currentPermuationTokens.add(iterator.next());
|
||||
currentPermutationRows.add(row);
|
||||
}
|
||||
}
|
||||
|
@ -471,12 +478,12 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* @param currentPermutationRows index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens
|
||||
* @param currentPermuationTokens tokens of the current permutation of rows in the matrix.
|
||||
*/
|
||||
public void updateToken(Token token, List<Token> shingle, int currentPermutationStartOffset, List<Matrix.Column.Row> currentPermutationRows, List<Token> currentPermuationTokens) {
|
||||
token.setType(ShingleMatrixFilter.class.getSimpleName());
|
||||
public void updateToken(Token token, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
|
||||
token.setType(ShingleMatrixFilter.class.getName());
|
||||
token.setFlags(0);
|
||||
token.setPositionIncrement(1);
|
||||
token.setStartOffset(shingle.get(0).startOffset());
|
||||
token.setEndOffset(shingle.get(shingle.size() - 1).endOffset());
|
||||
token.setStartOffset(((Token) shingle.get(0)).startOffset());
|
||||
token.setEndOffset(((Token) shingle.get(shingle.size() - 1)).endOffset());
|
||||
settingsCodec.setWeight(token, calculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens));
|
||||
}
|
||||
|
||||
|
@ -496,7 +503,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* @param currentPermuationTokens all tokens in the current row permutation of the matrix. A sub list (parameter offset, parameter shingle.size) equals parameter shingle.
|
||||
* @return weight to be set for parameter shingleToken
|
||||
*/
|
||||
public float calculateShingleWeight(Token shingleToken, List<Token> shingle, int currentPermutationStartOffset, List<Matrix.Column.Row> currentPermutationRows, List<Token> currentPermuationTokens) {
|
||||
public float calculateShingleWeight(Token shingleToken, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
|
||||
double[] weights = new double[shingle.size()];
|
||||
|
||||
double total = 0f;
|
||||
|
@ -504,7 +511,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
|
||||
|
||||
for (int i=0; i<weights.length; i++) {
|
||||
weights[i] = settingsCodec.getWeight(shingle.get(i));
|
||||
weights[i] = settingsCodec.getWeight((Token) shingle.get(i));
|
||||
|
||||
double tmp = weights[i];
|
||||
if (tmp > top) {
|
||||
|
@ -516,7 +523,8 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
double factor = 1d / Math.sqrt(total);
|
||||
|
||||
double weight = 0d;
|
||||
for (double partWeight : weights) {
|
||||
for (int i = 0; i < weights.length; i++) {
|
||||
double partWeight = weights[i];
|
||||
weight += partWeight * factor;
|
||||
}
|
||||
|
||||
|
@ -597,9 +605,9 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
|
||||
private boolean columnsHasBeenCreated = false;
|
||||
|
||||
private List<Column> columns = new ArrayList<Column>();
|
||||
private List columns = new ArrayList();
|
||||
|
||||
public List<Column> getColumns() {
|
||||
public List getColumns() {
|
||||
return columns;
|
||||
}
|
||||
|
||||
|
@ -628,9 +636,9 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
Matrix.this.columns.add(this);
|
||||
}
|
||||
|
||||
private List<Row> rows = new ArrayList<Row>();
|
||||
private List rows = new ArrayList();
|
||||
|
||||
public List<Row> getRows() {
|
||||
public List getRows() {
|
||||
return rows;
|
||||
}
|
||||
|
||||
|
@ -669,7 +677,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
return Column.this;
|
||||
}
|
||||
|
||||
private List<Token> tokens = new LinkedList<Token>();
|
||||
private List tokens = new LinkedList();
|
||||
|
||||
public Row() {
|
||||
Column.this.rows.add(this);
|
||||
|
@ -679,11 +687,11 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
return Column.this.rows.indexOf(this);
|
||||
}
|
||||
|
||||
public List<Token> getTokens() {
|
||||
public List getTokens() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
public void setTokens(List<Token> tokens) {
|
||||
public void setTokens(List tokens) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
|
@ -706,7 +714,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
public String toString() {
|
||||
return "Row{" +
|
||||
"index=" + getIndex() +
|
||||
", tokens=" + (tokens == null ? null : Arrays.asList(tokens)) +
|
||||
", tokens=" + (tokens == null ? null : tokens) +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
@ -714,9 +722,9 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
}
|
||||
|
||||
|
||||
public Iterator<Column.Row[]> permutationIterator() {
|
||||
public Iterator permutationIterator() {
|
||||
|
||||
return new Iterator<Column.Row[]>() {
|
||||
return new Iterator() {
|
||||
|
||||
private int[] columnRowCounters = new int[columns.size()];
|
||||
|
||||
|
@ -726,10 +734,10 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
|
||||
public boolean hasNext() {
|
||||
int s = columnRowCounters.length;
|
||||
return s != 0 && columnRowCounters[s - 1] < columns.get(s - 1).getRows().size();
|
||||
return s != 0 && columnRowCounters[s - 1] < ((Column) columns.get(s - 1)).getRows().size();
|
||||
}
|
||||
|
||||
public Column.Row[] next() {
|
||||
public Object next() {
|
||||
if (!hasNext()) {
|
||||
throw new NoSuchElementException("no more elements");
|
||||
}
|
||||
|
@ -737,7 +745,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
Column.Row[] rows = new Column.Row[columnRowCounters.length];
|
||||
|
||||
for (int i = 0; i < columnRowCounters.length; i++) {
|
||||
rows[i] = columns.get(i).rows.get(columnRowCounters[i]);
|
||||
rows[i] = (Matrix.Column.Row) ((Column) columns.get(i)).rows.get(columnRowCounters[i]);
|
||||
}
|
||||
incrementColumnRowCounters();
|
||||
|
||||
|
@ -747,7 +755,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
private void incrementColumnRowCounters() {
|
||||
for (int i = 0; i < columnRowCounters.length; i++) {
|
||||
columnRowCounters[i]++;
|
||||
if (columnRowCounters[i] == columns.get(i).rows.size() &&
|
||||
if (columnRowCounters[i] == ((Column) columns.get(i)).rows.size() &&
|
||||
i < columnRowCounters.length - 1) {
|
||||
columnRowCounters[i] = 0;
|
||||
} else {
|
||||
|
|
|
@ -42,15 +42,15 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
TokenStream ts;
|
||||
|
||||
ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
|
||||
ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
|
||||
assertNull(ts.next(new Token()));
|
||||
|
||||
TokenListStream tls;
|
||||
LinkedList<Token> tokens;
|
||||
LinkedList tokens;
|
||||
|
||||
// test a plain old token stream with synonyms translated to rows.
|
||||
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens = new LinkedList();
|
||||
tokens.add(createToken("please", 0, 6));
|
||||
tokens.add(createToken("divide", 7, 13));
|
||||
tokens.add(createToken("this", 14, 18));
|
||||
|
@ -62,7 +62,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
// bi-grams
|
||||
|
||||
ts = new ShingleMatrixFilter(tls, 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
|
||||
ts = new ShingleMatrixFilter(tls, 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
|
||||
|
||||
Token reusableToken = new Token();
|
||||
|
||||
|
@ -93,11 +93,11 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
TokenStream ts;
|
||||
TokenListStream tls;
|
||||
LinkedList<Token> tokens;
|
||||
LinkedList tokens;
|
||||
|
||||
// test a plain old token stream with synonyms tranlated to rows.
|
||||
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens = new LinkedList();
|
||||
tokens.add(tokenFactory("hello", 1, 0, 4));
|
||||
tokens.add(tokenFactory("greetings", 0, 0, 4));
|
||||
tokens.add(tokenFactory("world", 1, 5, 10));
|
||||
|
@ -108,7 +108,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
// bi-grams
|
||||
|
||||
ts = new ShingleMatrixFilter(tls, 2, 2, '_', false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
|
||||
ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
|
||||
|
||||
final Token reusableToken = new Token();
|
||||
assertNext(ts, reusableToken, "hello_world");
|
||||
|
@ -138,7 +138,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
|
||||
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens = new LinkedList();
|
||||
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
|
||||
tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
|
||||
tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn));
|
||||
|
@ -152,7 +152,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
// bi-grams, position incrememnt, weight, start offset, end offset
|
||||
|
||||
ts = new ShingleMatrixFilter(tls, 2, 2, '_', false);
|
||||
ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false);
|
||||
//
|
||||
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
|
||||
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
|
||||
|
@ -174,7 +174,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
// test unlimited size and allow single boundary token as shingle
|
||||
tls.reset();
|
||||
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', false);
|
||||
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), false);
|
||||
|
||||
//
|
||||
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
|
||||
|
@ -224,7 +224,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
// test unlimited size but don't allow single boundary token as shingle
|
||||
|
||||
tls.reset();
|
||||
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', true);
|
||||
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), true);
|
||||
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
|
||||
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
|
||||
// token.clear();
|
||||
|
@ -279,7 +279,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
//
|
||||
|
||||
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens = new LinkedList();
|
||||
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
|
||||
tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
|
||||
tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow));
|
||||
|
@ -292,7 +292,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
// 2-3 grams
|
||||
|
||||
ts = new ShingleMatrixFilter(tls, 2, 3, '_', false);
|
||||
ts = new ShingleMatrixFilter(tls, 2, 3, new Character('_'), false);
|
||||
|
||||
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
|
||||
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
|
||||
|
@ -353,7 +353,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
matrix.new Column(tokenFactory("the", 1));
|
||||
matrix.new Column(tokenFactory("croud", 1));
|
||||
|
||||
TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec());
|
||||
TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, new Character('_'), true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec());
|
||||
|
||||
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
|
||||
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
|
||||
|
@ -457,7 +457,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
assertNotNull(nextToken);
|
||||
assertEquals(text, nextToken.term());
|
||||
assertEquals(positionIncrement, nextToken.getPositionIncrement());
|
||||
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()));
|
||||
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()), 0);
|
||||
return nextToken;
|
||||
}
|
||||
|
||||
|
@ -466,7 +466,7 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
assertNotNull(nextToken);
|
||||
assertEquals(text, nextToken.term());
|
||||
assertEquals(positionIncrement, nextToken.getPositionIncrement());
|
||||
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()));
|
||||
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()), 0);
|
||||
assertEquals(startOffset, nextToken.startOffset());
|
||||
assertEquals(endOffset, nextToken.endOffset());
|
||||
return nextToken;
|
||||
|
@ -491,21 +491,21 @@ public class TestShingleMatrixFilter extends TestCase {
|
|||
|
||||
public static class TokenListStream extends TokenStream {
|
||||
|
||||
private Collection<Token> tokens;
|
||||
private Collection tokens;
|
||||
|
||||
public TokenListStream(TokenStream ts) throws IOException {
|
||||
tokens = new ArrayList<Token>();
|
||||
tokens = new ArrayList();
|
||||
final Token reusableToken = new Token();
|
||||
for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
|
||||
tokens.add((Token) nextToken.clone());
|
||||
}
|
||||
}
|
||||
|
||||
public TokenListStream(Collection<Token> tokens) {
|
||||
public TokenListStream(Collection tokens) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
private Iterator<Token> iterator;
|
||||
private Iterator iterator;
|
||||
|
||||
public Token next(final Token reusableToken) throws IOException {
|
||||
assert reusableToken != null;
|
||||
|
|
Loading…
Reference in New Issue