From 2c1dc3063996e374c224c737a21c72a52a91c545 Mon Sep 17 00:00:00 2001 From: Daniel Naber Date: Sun, 7 Nov 2004 23:41:50 +0000 Subject: [PATCH] indent the same everywhere, no functional change git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150630 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/search/FuzzyTermEnum.java | 361 +++++++++--------- 1 file changed, 179 insertions(+), 182 deletions(-) diff --git a/src/java/org/apache/lucene/search/FuzzyTermEnum.java b/src/java/org/apache/lucene/search/FuzzyTermEnum.java index 71908c020d0..2f1402d9d88 100644 --- a/src/java/org/apache/lucene/search/FuzzyTermEnum.java +++ b/src/java/org/apache/lucene/search/FuzzyTermEnum.java @@ -29,127 +29,127 @@ import java.io.IOException; */ public final class FuzzyTermEnum extends FilteredTermEnum { - /* This should be somewhere around the average long word. - * If it is longer, we waste time and space. If it is shorter, we waste a - * little bit of time growing the array as we encounter longer words. - */ - private static final int TYPICAL_LONGEST_WORD_IN_INDEX = 19; + /* This should be somewhere around the average long word. + * If it is longer, we waste time and space. If it is shorter, we waste a + * little bit of time growing the array as we encounter longer words. + */ + private static final int TYPICAL_LONGEST_WORD_IN_INDEX = 19; - /* Allows us save time required to create a new array - * everytime similarity is called. - */ - private int[][] d; + /* Allows us save time required to create a new array + * everytime similarity is called. + */ + private int[][] d; - private float similarity; - private boolean endEnum = false; + private float similarity; + private boolean endEnum = false; - private Term searchTerm = null; - private final String field; - private final String text; - private final String prefix; + private Term searchTerm = null; + private final String field; + private final String text; + private final String prefix; - private final float minimumSimilarity; - private final float scale_factor; - private final int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX]; + private final float minimumSimilarity; + private final float scale_factor; + private final int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX]; - /** - * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. - * - * @param reader - * @param term - * @throws IOException - * @see #FuzzyTermEnum(IndexReader, Term, float, int) - */ - public FuzzyTermEnum(IndexReader reader, Term term) throws IOException { - this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength); - } + /** + * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. + * + * @param reader + * @param term + * @throws IOException + * @see #FuzzyTermEnum(IndexReader, Term, float, int) + */ + public FuzzyTermEnum(IndexReader reader, Term term) throws IOException { + this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength); + } - /** - * Creates a FuzzyTermEnum with an empty prefix. - * - * @param reader - * @param term - * @param minSimilarity - * @throws IOException - * @see #FuzzyTermEnum(IndexReader, Term, float, int) - */ - public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException { - this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength); - } + /** + * Creates a FuzzyTermEnum with an empty prefix. + * + * @param reader + * @param term + * @param minSimilarity + * @throws IOException + * @see #FuzzyTermEnum(IndexReader, Term, float, int) + */ + public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException { + this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength); + } - /** - * Constructor for enumeration of all terms from specified reader which share a prefix of - * length prefixLength with term and which have a fuzzy similarity > - * minSimilarity. - * - * @param reader Delivers terms. - * @param term Pattern term. - * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f. - * @param prefixLength Length of required common prefix. Default value is 0. - * @throws IOException - */ - public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException { - super(); - - if (minSimilarity >= 1.0f) - throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1"); - else if (minSimilarity < 0.0f) - throw new IllegalArgumentException("minimumSimilarity cannot be less than 0"); - if(prefixLength < 0) - throw new IllegalArgumentException("prefixLength cannot be less than 0"); - - this.minimumSimilarity = minSimilarity; - this.scale_factor = 1.0f / (1.0f - minimumSimilarity); - this.searchTerm = term; - this.field = searchTerm.field(); - - //The prefix could be longer than the word. - //It's kind of silly though. It means we must match the entire word. - final int fullSearchTermLength = searchTerm.text().length(); - final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; - - this.text = searchTerm.text().substring(realPrefixLength); - this.prefix = searchTerm.text().substring(0, realPrefixLength); - - initializeMaxDistances(); - this.d = initDistanceArray(); - - setEnum(reader.terms(new Term(searchTerm.field(), prefix))); - } - - /** - * The termCompare method in FuzzyTermEnum uses Levenshtein distance to - * calculate the distance between the given term and the comparing term. - */ - protected final boolean termCompare(Term term) { - if (field == term.field() && term.text().startsWith(prefix)) { - final String target = term.text().substring(prefix.length()); - this.similarity = similarity(target); - return (similarity > minimumSimilarity); - } - endEnum = true; - return false; - } + /** + * Constructor for enumeration of all terms from specified reader which share a prefix of + * length prefixLength with term and which have a fuzzy similarity > + * minSimilarity. + * + * @param reader Delivers terms. + * @param term Pattern term. + * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f. + * @param prefixLength Length of required common prefix. Default value is 0. + * @throws IOException + */ + public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException { + super(); - public final float difference() { - return (float)((similarity - minimumSimilarity) * scale_factor); - } - - public final boolean endEnum() { - return endEnum; - } - - /****************************** - * Compute Levenshtein distance - ******************************/ - - /** - * Finds and returns the smallest of three integers - */ - private static final int min(int a, int b, int c) { - final int t = (a < b) ? a : b; - return (t < c) ? t : c; + if (minSimilarity >= 1.0f) + throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1"); + else if (minSimilarity < 0.0f) + throw new IllegalArgumentException("minimumSimilarity cannot be less than 0"); + if(prefixLength < 0) + throw new IllegalArgumentException("prefixLength cannot be less than 0"); + + this.minimumSimilarity = minSimilarity; + this.scale_factor = 1.0f / (1.0f - minimumSimilarity); + this.searchTerm = term; + this.field = searchTerm.field(); + + //The prefix could be longer than the word. + //It's kind of silly though. It means we must match the entire word. + final int fullSearchTermLength = searchTerm.text().length(); + final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; + + this.text = searchTerm.text().substring(realPrefixLength); + this.prefix = searchTerm.text().substring(0, realPrefixLength); + + initializeMaxDistances(); + this.d = initDistanceArray(); + + setEnum(reader.terms(new Term(searchTerm.field(), prefix))); + } + + /** + * The termCompare method in FuzzyTermEnum uses Levenshtein distance to + * calculate the distance between the given term and the comparing term. + */ + protected final boolean termCompare(Term term) { + if (field == term.field() && term.text().startsWith(prefix)) { + final String target = term.text().substring(prefix.length()); + this.similarity = similarity(target); + return (similarity > minimumSimilarity); } + endEnum = true; + return false; + } + + public final float difference() { + return (float)((similarity - minimumSimilarity) * scale_factor); + } + + public final boolean endEnum() { + return endEnum; + } + + /****************************** + * Compute Levenshtein distance + ******************************/ + + /** + * Finds and returns the smallest of three integers + */ + private static final int min(int a, int b, int c) { + final int t = (a < b) ? a : b; + return (t < c) ? t : c; + } private final int[][] initDistanceArray(){ return new int[this.text.length() + 1][TYPICAL_LONGEST_WORD_IN_INDEX]; @@ -192,81 +192,79 @@ public final class FuzzyTermEnum extends FilteredTermEnum { * @return the similarity, 0.0 or less indicates that it matches less than the required * threshold and 1.0 indicates that the text and target are identical */ - private synchronized final float similarity(final String target) { - final int m = target.length(); - final int n = text.length(); - if (n == 0) { - //we don't have antyhing to compare. That means if we just add - //the letters for m we get the new word - return prefix.length() == 0 ? 0.0f : 1.0f - ((float) m / prefix.length()); - } - if (m == 0) { - return prefix.length() == 0 ? 0.0f : 1.0f - ((float) n / prefix.length()); - } - - final int maxDistance = getMaxDistance(m); - - if (maxDistance < Math.abs(m-n)) { - //just adding the characters of m to n or vice-versa results in - //too many edits - //for example "pre" length is 3 and "prefixes" length is 8. We can see that - //given this optimal circumstance, the edit distance cannot be less than 5. - //which is 8-3 or more precisesly Math.abs(3-8). - //if our maximum edit distance is 4, than we can discard this word - //without looking at it. - return 0.0f; - } - - //let's make sure we have enough room in our array to do the distance calculations. - if (d[0].length <= m) { - growDistanceArray(m); - } - - // init matrix d - for (int i = 0; i <= n; i++) d[i][0] = i; - for (int j = 0; j <= m; j++) d[0][j] = j; - - // start computing edit distance - for (int i = 1; i <= n; i++) { - int bestPossibleEditDistance = m; - final char s_i = text.charAt(i - 1); - for (int j = 1; j <= m; j++) { - if (s_i != target.charAt(j-1)) { - d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1; - } - else { - d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]); - } - bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i][j]); - } - - //After calculating row i, the best possible edit distance - //can be found by found by finding the smallest value in a given column. - //If the bestPossibleEditDistance is greater than the max distance, abort. - - if (i > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater - //the closest the target can be to the text is just too far away. - //this target is leaving the party early. - return 0.0f; - } - } - - // this will return less than 0.0 when the edit distance is - // greater than the number of characters in the shorter word. - // but this was the formula that was previously used in FuzzyTermEnum, - // so it has not been changed (even though minimumSimilarity must be - // greater than 0.0) - return 1.0f - ((float)d[n][m] / (float) (prefix.length() + Math.min(n, m))); - + private synchronized final float similarity(final String target) { + final int m = target.length(); + final int n = text.length(); + if (n == 0) { + //we don't have antyhing to compare. That means if we just add + //the letters for m we get the new word + return prefix.length() == 0 ? 0.0f : 1.0f - ((float) m / prefix.length()); } + if (m == 0) { + return prefix.length() == 0 ? 0.0f : 1.0f - ((float) n / prefix.length()); + } + + final int maxDistance = getMaxDistance(m); + + if (maxDistance < Math.abs(m-n)) { + //just adding the characters of m to n or vice-versa results in + //too many edits + //for example "pre" length is 3 and "prefixes" length is 8. We can see that + //given this optimal circumstance, the edit distance cannot be less than 5. + //which is 8-3 or more precisesly Math.abs(3-8). + //if our maximum edit distance is 4, than we can discard this word + //without looking at it. + return 0.0f; + } + + //let's make sure we have enough room in our array to do the distance calculations. + if (d[0].length <= m) { + growDistanceArray(m); + } + + // init matrix d + for (int i = 0; i <= n; i++) d[i][0] = i; + for (int j = 0; j <= m; j++) d[0][j] = j; + + // start computing edit distance + for (int i = 1; i <= n; i++) { + int bestPossibleEditDistance = m; + final char s_i = text.charAt(i - 1); + for (int j = 1; j <= m; j++) { + if (s_i != target.charAt(j-1)) { + d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1; + } + else { + d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]); + } + bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i][j]); + } + + //After calculating row i, the best possible edit distance + //can be found by found by finding the smallest value in a given column. + //If the bestPossibleEditDistance is greater than the max distance, abort. + + if (i > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater + //the closest the target can be to the text is just too far away. + //this target is leaving the party early. + return 0.0f; + } + } + + // this will return less than 0.0 when the edit distance is + // greater than the number of characters in the shorter word. + // but this was the formula that was previously used in FuzzyTermEnum, + // so it has not been changed (even though minimumSimilarity must be + // greater than 0.0) + return 1.0f - ((float)d[n][m] / (float) (prefix.length() + Math.min(n, m))); + } /** * Grow the second dimension of the array, so that we can calculate the * Levenshtein difference. */ private void growDistanceArray(int m) { - for (int i = 0; i < d.length; i++) - { + for (int i = 0; i < d.length; i++) { d[i] = new int[m+1]; } } @@ -283,8 +281,7 @@ public final class FuzzyTermEnum extends FilteredTermEnum { } private void initializeMaxDistances() { - for (int i = 0; i < maxDistances.length; i++) - { + for (int i = 0; i < maxDistances.length; i++) { maxDistances[i] = calculateMaxDistance(i); } }