LUCENE-1320

ShingleMatrixFilter JDK downgrade 1.5 -> 1.4 
Grant Ingersoll via Karl Wettin

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@694393 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Karl-Johan Wettin 2008-09-11 18:23:18 +00:00
parent d31fb4556c
commit 71f2d8199b
3 changed files with 81 additions and 73 deletions

View File

@ -23,8 +23,8 @@
Additional Analyzers
</description>
<property name="javac.source" value="1.5" />
<property name="javac.target" value="1.5" />
<property name="javac.source" value="1.4" />
<property name="javac.target" value="1.4" />
<import file="../contrib-build.xml"/>
</project>

View File

@ -108,7 +108,7 @@ import org.apache.lucene.index.Payload;
*/
public class ShingleMatrixFilter extends TokenStream {
public static Character defaultSpacerCharacter = '_';
public static Character defaultSpacerCharacter = new Character('_');
public static TokenSettingsCodec defaultSettingsCodec = new OneDimensionalNonWeightedTokenSettingsCodec();
public static boolean ignoringSinglePrefixOrSuffixShingleByDefault = false;
@ -155,10 +155,10 @@ public class ShingleMatrixFilter extends TokenStream {
* @see org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenSettingsCodec#getTokenPositioner(org.apache.lucene.analysis.Token)
* @see org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenSettingsCodec#setTokenPositioner(org.apache.lucene.analysis.Token,org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenPositioner)
*/
public static enum TokenPositioner {
newColumn(0),
newRow(1),
sameRow(2);
public static class TokenPositioner {
public static final TokenPositioner newColumn = new TokenPositioner(0);
public static final TokenPositioner newRow = new TokenPositioner(1);
public static final TokenPositioner sameRow = new TokenPositioner(2);
private final int index;
@ -180,7 +180,7 @@ public class ShingleMatrixFilter extends TokenStream {
private boolean ignoringSinglePrefixOrSuffixShingle = false;
private Character spacerCharacter = '_';
private Character spacerCharacter = defaultSpacerCharacter;
private TokenStream input;
@ -279,12 +279,12 @@ public class ShingleMatrixFilter extends TokenStream {
// internal filter instance variables
/** iterator over the current matrix row permutations */
private Iterator<Matrix.Column.Row[]> permutations;
private Iterator permutations;
/** the current permutation of tokens used to produce shingles */
private List<Token> currentPermuationTokens;
private List currentPermuationTokens;
/** index to what row a token in currentShingleTokens represents*/
private List<Matrix.Column.Row> currentPermutationRows;
private List currentPermutationRows;
private int currentPermutationTokensStartOffset;
private int currentShingleLength;
@ -293,7 +293,7 @@ public class ShingleMatrixFilter extends TokenStream {
* a set containing shingles that has been the result of a call to next(Token),
* used to avoid producing the same shingle more than once.
*/
private Set<List<Token>> shinglesSeen = new HashSet<List<Token>>();
private Set shinglesSeen = new HashSet();
public void reset() throws IOException {
@ -324,16 +324,16 @@ public class ShingleMatrixFilter extends TokenStream {
if (ignoringSinglePrefixOrSuffixShingle
&& currentShingleLength == 1
&& (currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isFirst() || currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isLast())) {
&& (((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst() || ((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast())) {
return next(reusableToken);
}
int termLength = 0;
List<Token> shingle = new ArrayList<Token>();
List shingle = new ArrayList();
for (int i = 0; i < currentShingleLength; i++) {
Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
Token shingleToken = (Token) currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
termLength += shingleToken.termLength();
shingle.add(shingleToken);
}
@ -347,8 +347,9 @@ public class ShingleMatrixFilter extends TokenStream {
}
// shingle token factory
StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future.
for (Token shingleToken : shingle) {
StringBuffer sb = new StringBuffer(termLength + 10); // paranormal ability to foresee the future.
for (Iterator iterator = shingle.iterator(); iterator.hasNext();) {
Token shingleToken = (Token) iterator.next();
if (spacerCharacter != null && sb.length() > 0) {
sb.append(spacerCharacter);
}
@ -388,18 +389,22 @@ public class ShingleMatrixFilter extends TokenStream {
// get rith of resources
// delete the first column in the matrix
Matrix.Column deletedColumn = matrix.columns.remove(0);
Matrix.Column deletedColumn = (Matrix.Column) matrix.columns.remove(0);
// remove all shingles seen that include any of the tokens from the deleted column.
List<Token> deletedColumnTokens = new ArrayList<Token>();
for (Matrix.Column.Row row : deletedColumn.getRows()) {
for (Token shingleToken : row.getTokens()) {
deletedColumnTokens.add(shingleToken);
List deletedColumnTokens = new ArrayList();
for (Iterator iterator = deletedColumn.getRows().iterator(); iterator.hasNext();) {
Matrix.Column.Row row = (Matrix.Column.Row) iterator.next();
for (Iterator rowIter = row.getTokens().iterator(); rowIter.hasNext();) {
Object o = rowIter.next();//Token
deletedColumnTokens.add(o);
}
}
for (Iterator<List<Token>> shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
List<Token> shingle = shinglesSeenIterator.next();
for (Token deletedColumnToken : deletedColumnTokens) {
for (Iterator shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
List shingle = (List) shinglesSeenIterator.next();
for (Iterator deletedIter = deletedColumnTokens.iterator(); deletedIter.hasNext();) {
Token deletedColumnToken = (Token) deletedIter.next();
if (shingle.contains(deletedColumnToken)) {
shinglesSeenIterator.remove();
break;
@ -443,12 +448,14 @@ public class ShingleMatrixFilter extends TokenStream {
* finally resets the current (next) shingle size and offset.
*/
private void nextTokensPermutation() {
Matrix.Column.Row[] rowsPermutation = permutations.next();
List<Matrix.Column.Row> currentPermutationRows = new ArrayList<Matrix.Column.Row>();
List<Token> currentPermuationTokens = new ArrayList<Token>();
for (Matrix.Column.Row row : rowsPermutation) {
for (Token shingleToken : row.getTokens()) {
currentPermuationTokens.add(shingleToken);
Matrix.Column.Row[] rowsPermutation;
rowsPermutation = (Matrix.Column.Row[]) permutations.next();
List currentPermutationRows = new ArrayList();
List currentPermuationTokens = new ArrayList();
for (int i = 0; i < rowsPermutation.length; i++) {
Matrix.Column.Row row = rowsPermutation[i];
for (Iterator iterator = row.getTokens().iterator(); iterator.hasNext();) {
currentPermuationTokens.add(iterator.next());
currentPermutationRows.add(row);
}
}
@ -471,12 +478,12 @@ public class ShingleMatrixFilter extends TokenStream {
* @param currentPermutationRows index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens
* @param currentPermuationTokens tokens of the current permutation of rows in the matrix.
*/
public void updateToken(Token token, List<Token> shingle, int currentPermutationStartOffset, List<Matrix.Column.Row> currentPermutationRows, List<Token> currentPermuationTokens) {
token.setType(ShingleMatrixFilter.class.getSimpleName());
public void updateToken(Token token, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
token.setType(ShingleMatrixFilter.class.getName());
token.setFlags(0);
token.setPositionIncrement(1);
token.setStartOffset(shingle.get(0).startOffset());
token.setEndOffset(shingle.get(shingle.size() - 1).endOffset());
token.setStartOffset(((Token) shingle.get(0)).startOffset());
token.setEndOffset(((Token) shingle.get(shingle.size() - 1)).endOffset());
settingsCodec.setWeight(token, calculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens));
}
@ -496,7 +503,7 @@ public class ShingleMatrixFilter extends TokenStream {
* @param currentPermuationTokens all tokens in the current row permutation of the matrix. A sub list (parameter offset, parameter shingle.size) equals parameter shingle.
* @return weight to be set for parameter shingleToken
*/
public float calculateShingleWeight(Token shingleToken, List<Token> shingle, int currentPermutationStartOffset, List<Matrix.Column.Row> currentPermutationRows, List<Token> currentPermuationTokens) {
public float calculateShingleWeight(Token shingleToken, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
double[] weights = new double[shingle.size()];
double total = 0f;
@ -504,7 +511,7 @@ public class ShingleMatrixFilter extends TokenStream {
for (int i=0; i<weights.length; i++) {
weights[i] = settingsCodec.getWeight(shingle.get(i));
weights[i] = settingsCodec.getWeight((Token) shingle.get(i));
double tmp = weights[i];
if (tmp > top) {
@ -516,7 +523,8 @@ public class ShingleMatrixFilter extends TokenStream {
double factor = 1d / Math.sqrt(total);
double weight = 0d;
for (double partWeight : weights) {
for (int i = 0; i < weights.length; i++) {
double partWeight = weights[i];
weight += partWeight * factor;
}
@ -597,9 +605,9 @@ public class ShingleMatrixFilter extends TokenStream {
private boolean columnsHasBeenCreated = false;
private List<Column> columns = new ArrayList<Column>();
private List columns = new ArrayList();
public List<Column> getColumns() {
public List getColumns() {
return columns;
}
@ -628,9 +636,9 @@ public class ShingleMatrixFilter extends TokenStream {
Matrix.this.columns.add(this);
}
private List<Row> rows = new ArrayList<Row>();
private List rows = new ArrayList();
public List<Row> getRows() {
public List getRows() {
return rows;
}
@ -669,7 +677,7 @@ public class ShingleMatrixFilter extends TokenStream {
return Column.this;
}
private List<Token> tokens = new LinkedList<Token>();
private List tokens = new LinkedList();
public Row() {
Column.this.rows.add(this);
@ -679,11 +687,11 @@ public class ShingleMatrixFilter extends TokenStream {
return Column.this.rows.indexOf(this);
}
public List<Token> getTokens() {
public List getTokens() {
return tokens;
}
public void setTokens(List<Token> tokens) {
public void setTokens(List tokens) {
this.tokens = tokens;
}
@ -706,7 +714,7 @@ public class ShingleMatrixFilter extends TokenStream {
public String toString() {
return "Row{" +
"index=" + getIndex() +
", tokens=" + (tokens == null ? null : Arrays.asList(tokens)) +
", tokens=" + (tokens == null ? null : tokens) +
'}';
}
}
@ -714,9 +722,9 @@ public class ShingleMatrixFilter extends TokenStream {
}
public Iterator<Column.Row[]> permutationIterator() {
public Iterator permutationIterator() {
return new Iterator<Column.Row[]>() {
return new Iterator() {
private int[] columnRowCounters = new int[columns.size()];
@ -726,10 +734,10 @@ public class ShingleMatrixFilter extends TokenStream {
public boolean hasNext() {
int s = columnRowCounters.length;
return s != 0 && columnRowCounters[s - 1] < columns.get(s - 1).getRows().size();
return s != 0 && columnRowCounters[s - 1] < ((Column) columns.get(s - 1)).getRows().size();
}
public Column.Row[] next() {
public Object next() {
if (!hasNext()) {
throw new NoSuchElementException("no more elements");
}
@ -737,7 +745,7 @@ public class ShingleMatrixFilter extends TokenStream {
Column.Row[] rows = new Column.Row[columnRowCounters.length];
for (int i = 0; i < columnRowCounters.length; i++) {
rows[i] = columns.get(i).rows.get(columnRowCounters[i]);
rows[i] = (Matrix.Column.Row) ((Column) columns.get(i)).rows.get(columnRowCounters[i]);
}
incrementColumnRowCounters();
@ -747,7 +755,7 @@ public class ShingleMatrixFilter extends TokenStream {
private void incrementColumnRowCounters() {
for (int i = 0; i < columnRowCounters.length; i++) {
columnRowCounters[i]++;
if (columnRowCounters[i] == columns.get(i).rows.size() &&
if (columnRowCounters[i] == ((Column) columns.get(i)).rows.size() &&
i < columnRowCounters.length - 1) {
columnRowCounters[i] = 0;
} else {

View File

@ -42,15 +42,15 @@ public class TestShingleMatrixFilter extends TestCase {
TokenStream ts;
ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
assertNull(ts.next(new Token()));
TokenListStream tls;
LinkedList<Token> tokens;
LinkedList tokens;
// test a plain old token stream with synonyms translated to rows.
tokens = new LinkedList<Token>();
tokens = new LinkedList();
tokens.add(createToken("please", 0, 6));
tokens.add(createToken("divide", 7, 13));
tokens.add(createToken("this", 14, 18));
@ -62,7 +62,7 @@ public class TestShingleMatrixFilter extends TestCase {
// bi-grams
ts = new ShingleMatrixFilter(tls, 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
ts = new ShingleMatrixFilter(tls, 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
Token reusableToken = new Token();
@ -93,11 +93,11 @@ public class TestShingleMatrixFilter extends TestCase {
TokenStream ts;
TokenListStream tls;
LinkedList<Token> tokens;
LinkedList tokens;
// test a plain old token stream with synonyms tranlated to rows.
tokens = new LinkedList<Token>();
tokens = new LinkedList();
tokens.add(tokenFactory("hello", 1, 0, 4));
tokens.add(tokenFactory("greetings", 0, 0, 4));
tokens.add(tokenFactory("world", 1, 5, 10));
@ -108,7 +108,7 @@ public class TestShingleMatrixFilter extends TestCase {
// bi-grams
ts = new ShingleMatrixFilter(tls, 2, 2, '_', false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
final Token reusableToken = new Token();
assertNext(ts, reusableToken, "hello_world");
@ -138,7 +138,7 @@ public class TestShingleMatrixFilter extends TestCase {
ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
tokens = new LinkedList<Token>();
tokens = new LinkedList();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn));
@ -152,7 +152,7 @@ public class TestShingleMatrixFilter extends TestCase {
// bi-grams, position incrememnt, weight, start offset, end offset
ts = new ShingleMatrixFilter(tls, 2, 2, '_', false);
ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false);
//
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
@ -174,7 +174,7 @@ public class TestShingleMatrixFilter extends TestCase {
// test unlimited size and allow single boundary token as shingle
tls.reset();
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', false);
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), false);
//
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
@ -224,7 +224,7 @@ public class TestShingleMatrixFilter extends TestCase {
// test unlimited size but don't allow single boundary token as shingle
tls.reset();
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', true);
ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), true);
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
// token.clear();
@ -279,7 +279,7 @@ public class TestShingleMatrixFilter extends TestCase {
//
tokens = new LinkedList<Token>();
tokens = new LinkedList();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow));
@ -292,7 +292,7 @@ public class TestShingleMatrixFilter extends TestCase {
// 2-3 grams
ts = new ShingleMatrixFilter(tls, 2, 3, '_', false);
ts = new ShingleMatrixFilter(tls, 2, 3, new Character('_'), false);
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
@ -353,7 +353,7 @@ public class TestShingleMatrixFilter extends TestCase {
matrix.new Column(tokenFactory("the", 1));
matrix.new Column(tokenFactory("croud", 1));
TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec());
TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, new Character('_'), true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec());
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
@ -457,7 +457,7 @@ public class TestShingleMatrixFilter extends TestCase {
assertNotNull(nextToken);
assertEquals(text, nextToken.term());
assertEquals(positionIncrement, nextToken.getPositionIncrement());
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()));
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()), 0);
return nextToken;
}
@ -466,7 +466,7 @@ public class TestShingleMatrixFilter extends TestCase {
assertNotNull(nextToken);
assertEquals(text, nextToken.term());
assertEquals(positionIncrement, nextToken.getPositionIncrement());
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()));
assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()), 0);
assertEquals(startOffset, nextToken.startOffset());
assertEquals(endOffset, nextToken.endOffset());
return nextToken;
@ -491,21 +491,21 @@ public class TestShingleMatrixFilter extends TestCase {
public static class TokenListStream extends TokenStream {
private Collection<Token> tokens;
private Collection tokens;
public TokenListStream(TokenStream ts) throws IOException {
tokens = new ArrayList<Token>();
tokens = new ArrayList();
final Token reusableToken = new Token();
for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
tokens.add((Token) nextToken.clone());
}
}
public TokenListStream(Collection<Token> tokens) {
public TokenListStream(Collection tokens) {
this.tokens = tokens;
}
private Iterator<Token> iterator;
private Iterator iterator;
public Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;