diff --git a/contrib/analyzers/build.xml b/contrib/analyzers/build.xml
index caf33d9e35d..b27cfc8b155 100644
--- a/contrib/analyzers/build.xml
+++ b/contrib/analyzers/build.xml
@@ -23,8 +23,8 @@
Additional Analyzers
-
-
-
+
+
+
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
index fc33e175819..c4ea8d3bdf4 100644
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
@@ -108,7 +108,7 @@ import org.apache.lucene.index.Payload;
*/
public class ShingleMatrixFilter extends TokenStream {
- public static Character defaultSpacerCharacter = '_';
+ public static Character defaultSpacerCharacter = new Character('_');
public static TokenSettingsCodec defaultSettingsCodec = new OneDimensionalNonWeightedTokenSettingsCodec();
public static boolean ignoringSinglePrefixOrSuffixShingleByDefault = false;
@@ -155,10 +155,10 @@ public class ShingleMatrixFilter extends TokenStream {
* @see org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenSettingsCodec#getTokenPositioner(org.apache.lucene.analysis.Token)
* @see org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenSettingsCodec#setTokenPositioner(org.apache.lucene.analysis.Token,org.apache.lucene.analysis.shingle.ShingleMatrixFilter.TokenPositioner)
*/
- public static enum TokenPositioner {
- newColumn(0),
- newRow(1),
- sameRow(2);
+ public static class TokenPositioner {
+ public static final TokenPositioner newColumn = new TokenPositioner(0);
+ public static final TokenPositioner newRow = new TokenPositioner(1);
+ public static final TokenPositioner sameRow = new TokenPositioner(2);
private final int index;
@@ -180,7 +180,7 @@ public class ShingleMatrixFilter extends TokenStream {
private boolean ignoringSinglePrefixOrSuffixShingle = false;
- private Character spacerCharacter = '_';
+ private Character spacerCharacter = defaultSpacerCharacter;
private TokenStream input;
@@ -279,12 +279,12 @@ public class ShingleMatrixFilter extends TokenStream {
// internal filter instance variables
/** iterator over the current matrix row permutations */
- private Iterator permutations;
+ private Iterator permutations;
/** the current permutation of tokens used to produce shingles */
- private List currentPermuationTokens;
+ private List currentPermuationTokens;
/** index to what row a token in currentShingleTokens represents*/
- private List currentPermutationRows;
+ private List currentPermutationRows;
private int currentPermutationTokensStartOffset;
private int currentShingleLength;
@@ -293,7 +293,7 @@ public class ShingleMatrixFilter extends TokenStream {
* a set containing shingles that has been the result of a call to next(Token),
* used to avoid producing the same shingle more than once.
*/
- private Set> shinglesSeen = new HashSet>();
+ private Set shinglesSeen = new HashSet();
public void reset() throws IOException {
@@ -324,16 +324,16 @@ public class ShingleMatrixFilter extends TokenStream {
if (ignoringSinglePrefixOrSuffixShingle
&& currentShingleLength == 1
- && (currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isFirst() || currentPermutationRows.get(currentPermutationTokensStartOffset).getColumn().isLast())) {
+ && (((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst() || ((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast())) {
return next(reusableToken);
}
int termLength = 0;
- List shingle = new ArrayList();
+ List shingle = new ArrayList();
for (int i = 0; i < currentShingleLength; i++) {
- Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
+ Token shingleToken = (Token) currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
termLength += shingleToken.termLength();
shingle.add(shingleToken);
}
@@ -347,8 +347,9 @@ public class ShingleMatrixFilter extends TokenStream {
}
// shingle token factory
- StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future.
- for (Token shingleToken : shingle) {
+ StringBuffer sb = new StringBuffer(termLength + 10); // paranormal ability to foresee the future.
+ for (Iterator iterator = shingle.iterator(); iterator.hasNext();) {
+ Token shingleToken = (Token) iterator.next();
if (spacerCharacter != null && sb.length() > 0) {
sb.append(spacerCharacter);
}
@@ -388,18 +389,22 @@ public class ShingleMatrixFilter extends TokenStream {
// get rith of resources
// delete the first column in the matrix
- Matrix.Column deletedColumn = matrix.columns.remove(0);
+ Matrix.Column deletedColumn = (Matrix.Column) matrix.columns.remove(0);
// remove all shingles seen that include any of the tokens from the deleted column.
- List deletedColumnTokens = new ArrayList();
- for (Matrix.Column.Row row : deletedColumn.getRows()) {
- for (Token shingleToken : row.getTokens()) {
- deletedColumnTokens.add(shingleToken);
+ List deletedColumnTokens = new ArrayList();
+ for (Iterator iterator = deletedColumn.getRows().iterator(); iterator.hasNext();) {
+ Matrix.Column.Row row = (Matrix.Column.Row) iterator.next();
+ for (Iterator rowIter = row.getTokens().iterator(); rowIter.hasNext();) {
+ Object o = rowIter.next();//Token
+ deletedColumnTokens.add(o);
}
+
}
- for (Iterator> shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
- List shingle = shinglesSeenIterator.next();
- for (Token deletedColumnToken : deletedColumnTokens) {
+ for (Iterator shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
+ List shingle = (List) shinglesSeenIterator.next();
+ for (Iterator deletedIter = deletedColumnTokens.iterator(); deletedIter.hasNext();) {
+ Token deletedColumnToken = (Token) deletedIter.next();
if (shingle.contains(deletedColumnToken)) {
shinglesSeenIterator.remove();
break;
@@ -443,12 +448,14 @@ public class ShingleMatrixFilter extends TokenStream {
* finally resets the current (next) shingle size and offset.
*/
private void nextTokensPermutation() {
- Matrix.Column.Row[] rowsPermutation = permutations.next();
- List currentPermutationRows = new ArrayList();
- List currentPermuationTokens = new ArrayList();
- for (Matrix.Column.Row row : rowsPermutation) {
- for (Token shingleToken : row.getTokens()) {
- currentPermuationTokens.add(shingleToken);
+ Matrix.Column.Row[] rowsPermutation;
+ rowsPermutation = (Matrix.Column.Row[]) permutations.next();
+ List currentPermutationRows = new ArrayList();
+ List currentPermuationTokens = new ArrayList();
+ for (int i = 0; i < rowsPermutation.length; i++) {
+ Matrix.Column.Row row = rowsPermutation[i];
+ for (Iterator iterator = row.getTokens().iterator(); iterator.hasNext();) {
+ currentPermuationTokens.add(iterator.next());
currentPermutationRows.add(row);
}
}
@@ -471,12 +478,12 @@ public class ShingleMatrixFilter extends TokenStream {
* @param currentPermutationRows index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens
* @param currentPermuationTokens tokens of the current permutation of rows in the matrix.
*/
- public void updateToken(Token token, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
- token.setType(ShingleMatrixFilter.class.getSimpleName());
+ public void updateToken(Token token, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
+ token.setType(ShingleMatrixFilter.class.getName());
token.setFlags(0);
token.setPositionIncrement(1);
- token.setStartOffset(shingle.get(0).startOffset());
- token.setEndOffset(shingle.get(shingle.size() - 1).endOffset());
+ token.setStartOffset(((Token) shingle.get(0)).startOffset());
+ token.setEndOffset(((Token) shingle.get(shingle.size() - 1)).endOffset());
settingsCodec.setWeight(token, calculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens));
}
@@ -496,7 +503,7 @@ public class ShingleMatrixFilter extends TokenStream {
* @param currentPermuationTokens all tokens in the current row permutation of the matrix. A sub list (parameter offset, parameter shingle.size) equals parameter shingle.
* @return weight to be set for parameter shingleToken
*/
- public float calculateShingleWeight(Token shingleToken, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
+ public float calculateShingleWeight(Token shingleToken, List shingle, int currentPermutationStartOffset, List currentPermutationRows, List currentPermuationTokens) {
double[] weights = new double[shingle.size()];
double total = 0f;
@@ -504,7 +511,7 @@ public class ShingleMatrixFilter extends TokenStream {
for (int i=0; i top) {
@@ -516,7 +523,8 @@ public class ShingleMatrixFilter extends TokenStream {
double factor = 1d / Math.sqrt(total);
double weight = 0d;
- for (double partWeight : weights) {
+ for (int i = 0; i < weights.length; i++) {
+ double partWeight = weights[i];
weight += partWeight * factor;
}
@@ -597,9 +605,9 @@ public class ShingleMatrixFilter extends TokenStream {
private boolean columnsHasBeenCreated = false;
- private List columns = new ArrayList();
+ private List columns = new ArrayList();
- public List getColumns() {
+ public List getColumns() {
return columns;
}
@@ -628,9 +636,9 @@ public class ShingleMatrixFilter extends TokenStream {
Matrix.this.columns.add(this);
}
- private List rows = new ArrayList();
+ private List rows = new ArrayList();
- public List getRows() {
+ public List getRows() {
return rows;
}
@@ -669,7 +677,7 @@ public class ShingleMatrixFilter extends TokenStream {
return Column.this;
}
- private List tokens = new LinkedList();
+ private List tokens = new LinkedList();
public Row() {
Column.this.rows.add(this);
@@ -679,11 +687,11 @@ public class ShingleMatrixFilter extends TokenStream {
return Column.this.rows.indexOf(this);
}
- public List getTokens() {
+ public List getTokens() {
return tokens;
}
- public void setTokens(List tokens) {
+ public void setTokens(List tokens) {
this.tokens = tokens;
}
@@ -706,7 +714,7 @@ public class ShingleMatrixFilter extends TokenStream {
public String toString() {
return "Row{" +
"index=" + getIndex() +
- ", tokens=" + (tokens == null ? null : Arrays.asList(tokens)) +
+ ", tokens=" + (tokens == null ? null : tokens) +
'}';
}
}
@@ -714,9 +722,9 @@ public class ShingleMatrixFilter extends TokenStream {
}
- public Iterator permutationIterator() {
+ public Iterator permutationIterator() {
- return new Iterator() {
+ return new Iterator() {
private int[] columnRowCounters = new int[columns.size()];
@@ -726,10 +734,10 @@ public class ShingleMatrixFilter extends TokenStream {
public boolean hasNext() {
int s = columnRowCounters.length;
- return s != 0 && columnRowCounters[s - 1] < columns.get(s - 1).getRows().size();
+ return s != 0 && columnRowCounters[s - 1] < ((Column) columns.get(s - 1)).getRows().size();
}
- public Column.Row[] next() {
+ public Object next() {
if (!hasNext()) {
throw new NoSuchElementException("no more elements");
}
@@ -737,7 +745,7 @@ public class ShingleMatrixFilter extends TokenStream {
Column.Row[] rows = new Column.Row[columnRowCounters.length];
for (int i = 0; i < columnRowCounters.length; i++) {
- rows[i] = columns.get(i).rows.get(columnRowCounters[i]);
+ rows[i] = (Matrix.Column.Row) ((Column) columns.get(i)).rows.get(columnRowCounters[i]);
}
incrementColumnRowCounters();
@@ -747,7 +755,7 @@ public class ShingleMatrixFilter extends TokenStream {
private void incrementColumnRowCounters() {
for (int i = 0; i < columnRowCounters.length; i++) {
columnRowCounters[i]++;
- if (columnRowCounters[i] == columns.get(i).rows.size() &&
+ if (columnRowCounters[i] == ((Column) columns.get(i)).rows.size() &&
i < columnRowCounters.length - 1) {
columnRowCounters[i] = 0;
} else {
diff --git a/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java b/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
index ee7547fc500..b5d7549fd70 100644
--- a/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
+++ b/contrib/analyzers/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
@@ -42,15 +42,15 @@ public class TestShingleMatrixFilter extends TestCase {
TokenStream ts;
- ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
+ ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
assertNull(ts.next(new Token()));
TokenListStream tls;
- LinkedList tokens;
+ LinkedList tokens;
// test a plain old token stream with synonyms translated to rows.
- tokens = new LinkedList();
+ tokens = new LinkedList();
tokens.add(createToken("please", 0, 6));
tokens.add(createToken("divide", 7, 13));
tokens.add(createToken("this", 14, 18));
@@ -62,7 +62,7 @@ public class TestShingleMatrixFilter extends TestCase {
// bi-grams
- ts = new ShingleMatrixFilter(tls, 1, 2, ' ', false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
+ ts = new ShingleMatrixFilter(tls, 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec());
Token reusableToken = new Token();
@@ -93,11 +93,11 @@ public class TestShingleMatrixFilter extends TestCase {
TokenStream ts;
TokenListStream tls;
- LinkedList tokens;
+ LinkedList tokens;
// test a plain old token stream with synonyms tranlated to rows.
- tokens = new LinkedList();
+ tokens = new LinkedList();
tokens.add(tokenFactory("hello", 1, 0, 4));
tokens.add(tokenFactory("greetings", 0, 0, 4));
tokens.add(tokenFactory("world", 1, 5, 10));
@@ -108,7 +108,7 @@ public class TestShingleMatrixFilter extends TestCase {
// bi-grams
- ts = new ShingleMatrixFilter(tls, 2, 2, '_', false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
+ ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
final Token reusableToken = new Token();
assertNext(ts, reusableToken, "hello_world");
@@ -138,7 +138,7 @@ public class TestShingleMatrixFilter extends TestCase {
ShingleMatrixFilter.defaultSettingsCodec = new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
- tokens = new LinkedList();
+ tokens = new LinkedList();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 0, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("world", 1, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newColumn));
@@ -152,7 +152,7 @@ public class TestShingleMatrixFilter extends TestCase {
// bi-grams, position incrememnt, weight, start offset, end offset
- ts = new ShingleMatrixFilter(tls, 2, 2, '_', false);
+ ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false);
//
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
@@ -174,7 +174,7 @@ public class TestShingleMatrixFilter extends TestCase {
// test unlimited size and allow single boundary token as shingle
tls.reset();
- ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', false);
+ ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), false);
//
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
@@ -224,7 +224,7 @@ public class TestShingleMatrixFilter extends TestCase {
// test unlimited size but don't allow single boundary token as shingle
tls.reset();
- ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, '_', true);
+ ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), true);
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
// token.clear();
@@ -279,7 +279,7 @@ public class TestShingleMatrixFilter extends TestCase {
//
- tokens = new LinkedList();
+ tokens = new LinkedList();
tokens.add(tokenFactory("hello", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newColumn));
tokens.add(tokenFactory("greetings", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.newRow));
tokens.add(tokenFactory("and", 1, 1f, 0, 4, ShingleMatrixFilter.TokenPositioner.sameRow));
@@ -292,7 +292,7 @@ public class TestShingleMatrixFilter extends TestCase {
// 2-3 grams
- ts = new ShingleMatrixFilter(tls, 2, 3, '_', false);
+ ts = new ShingleMatrixFilter(tls, 2, 3, new Character('_'), false);
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
@@ -353,7 +353,7 @@ public class TestShingleMatrixFilter extends TestCase {
matrix.new Column(tokenFactory("the", 1));
matrix.new Column(tokenFactory("croud", 1));
- TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec());
+ TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, new Character('_'), true, new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec());
// for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
// System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
@@ -457,7 +457,7 @@ public class TestShingleMatrixFilter extends TestCase {
assertNotNull(nextToken);
assertEquals(text, nextToken.term());
assertEquals(positionIncrement, nextToken.getPositionIncrement());
- assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()));
+ assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()), 0);
return nextToken;
}
@@ -466,7 +466,7 @@ public class TestShingleMatrixFilter extends TestCase {
assertNotNull(nextToken);
assertEquals(text, nextToken.term());
assertEquals(positionIncrement, nextToken.getPositionIncrement());
- assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()));
+ assertEquals(boost, nextToken.getPayload() == null ? 1f : PayloadHelper.decodeFloat(nextToken.getPayload().getData()), 0);
assertEquals(startOffset, nextToken.startOffset());
assertEquals(endOffset, nextToken.endOffset());
return nextToken;
@@ -491,21 +491,21 @@ public class TestShingleMatrixFilter extends TestCase {
public static class TokenListStream extends TokenStream {
- private Collection tokens;
+ private Collection tokens;
public TokenListStream(TokenStream ts) throws IOException {
- tokens = new ArrayList();
+ tokens = new ArrayList();
final Token reusableToken = new Token();
for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
tokens.add((Token) nextToken.clone());
}
}
- public TokenListStream(Collection tokens) {
+ public TokenListStream(Collection tokens) {
this.tokens = tokens;
}
- private Iterator iterator;
+ private Iterator iterator;
public Token next(final Token reusableToken) throws IOException {
assert reusableToken != null;