indent the same everywhere, no functional change

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150630 13f79535-47bb-0310-9956-ffa450edef68
2025-03-06 08:19:23 +00:00 · 2004-11-07 23:41:50 +00:00 · 2004-11-07 23:41:50 +00:00 · 2c1dc30639
commit 2c1dc30639
parent 6f3bf4837d
1 changed files with 179 additions and 182 deletions
--- a/src/java/org/apache/lucene/search/FuzzyTermEnum.java
+++ b/src/java/org/apache/lucene/search/FuzzyTermEnum.java
@ -29,127 +29,127 @@ import java.io.IOException;
 */
 public final class FuzzyTermEnum extends FilteredTermEnum {

-    /* This should be somewhere around the average long word.
-     * If it is longer, we waste time and space. If it is shorter, we waste a
-     * little bit of time growing the array as we encounter longer words.
-     */
-    private static final int TYPICAL_LONGEST_WORD_IN_INDEX = 19;
+  /* This should be somewhere around the average long word.
+   * If it is longer, we waste time and space. If it is shorter, we waste a
+   * little bit of time growing the array as we encounter longer words.
+   */
+  private static final int TYPICAL_LONGEST_WORD_IN_INDEX = 19;

-    /* Allows us save time required to create a new array
-     * everytime similarity is called.
-     */
-    private int[][] d;
+  /* Allows us save time required to create a new array
+   * everytime similarity is called.
+   */
+  private int[][] d;

-    private float similarity;
-    private boolean endEnum = false;
+  private float similarity;
+  private boolean endEnum = false;

-    private Term searchTerm = null;
-    private final String field;
-    private final String text;
-    private final String prefix;
+  private Term searchTerm = null;
+  private final String field;
+  private final String text;
+  private final String prefix;

-    private final float minimumSimilarity;
-    private final float scale_factor;
-    private final int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX];
+  private final float minimumSimilarity;
+  private final float scale_factor;
+  private final int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX];

-    /**
-     * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f.
-     * 
-     * @param reader
-     * @param term
-     * @throws IOException
-     * @see #FuzzyTermEnum(IndexReader, Term, float, int)
-     */
-    public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
-      this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength);
-    }
+  /**
+   * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f.
+   * 
+   * @param reader
+   * @param term
+   * @throws IOException
+   * @see #FuzzyTermEnum(IndexReader, Term, float, int)
+   */
+  public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
+    this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength);
+  }
    
-    /**
-     * Creates a FuzzyTermEnum with an empty prefix.
-     * 
-     * @param reader
-     * @param term
-     * @param minSimilarity
-     * @throws IOException
-     * @see #FuzzyTermEnum(IndexReader, Term, float, int)
-     */
-    public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
-      this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength);
-    }
+  /**
+   * Creates a FuzzyTermEnum with an empty prefix.
+   * 
+   * @param reader
+   * @param term
+   * @param minSimilarity
+   * @throws IOException
+   * @see #FuzzyTermEnum(IndexReader, Term, float, int)
+   */
+  public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
+    this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength);
+  }
    
-    /**
-     * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
-     * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
-     * <code>minSimilarity</code>. 
-     * 
-     * @param reader Delivers terms.
-     * @param term Pattern term.
-     * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
-     * @param prefixLength Length of required common prefix. Default value is 0.
-     * @throws IOException
-     */
-    public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException {
-        super();
-        
-        if (minSimilarity >= 1.0f)
-          throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1");
-        else if (minSimilarity < 0.0f)
-          throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
-        if(prefixLength < 0)
-          throw new IllegalArgumentException("prefixLength cannot be less than 0");
-
-        this.minimumSimilarity = minSimilarity;
-        this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
-        this.searchTerm = term;
-        this.field = searchTerm.field();
-
-        //The prefix could be longer than the word.
-        //It's kind of silly though.  It means we must match the entire word.
-        final int fullSearchTermLength = searchTerm.text().length();
-        final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength;
-
-        this.text = searchTerm.text().substring(realPrefixLength);
-        this.prefix = searchTerm.text().substring(0, realPrefixLength);
-
-        initializeMaxDistances();
-        this.d = initDistanceArray();
-
-        setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
-    }
-
-    /**
-     * The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
-     * calculate the distance between the given term and the comparing term. 
-     */
-    protected final boolean termCompare(Term term) {
-        if (field == term.field() && term.text().startsWith(prefix)) {
-            final String target = term.text().substring(prefix.length());
-            this.similarity = similarity(target);
-            return (similarity > minimumSimilarity);
-        }
-        endEnum = true;
-        return false;
-    }
+  /**
+   * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
+   * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
+   * <code>minSimilarity</code>. 
+   * 
+   * @param reader Delivers terms.
+   * @param term Pattern term.
+   * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
+   * @param prefixLength Length of required common prefix. Default value is 0.
+   * @throws IOException
+   */
+  public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException {
+    super();
    
-    public final float difference() {
-        return (float)((similarity - minimumSimilarity) * scale_factor);
-    }
-    
-    public final boolean endEnum() {
-        return endEnum;
-    }
-    
-    /******************************
-     * Compute Levenshtein distance
-     ******************************/
-    
-    /**
-     * Finds and returns the smallest of three integers 
-     */
-    private static final int min(int a, int b, int c) {
-        final int t = (a < b) ? a : b;
-        return (t < c) ? t : c;
+    if (minSimilarity >= 1.0f)
+      throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1");
+    else if (minSimilarity < 0.0f)
+      throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
+    if(prefixLength < 0)
+      throw new IllegalArgumentException("prefixLength cannot be less than 0");
+
+    this.minimumSimilarity = minSimilarity;
+    this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
+    this.searchTerm = term;
+    this.field = searchTerm.field();
+
+    //The prefix could be longer than the word.
+    //It's kind of silly though.  It means we must match the entire word.
+    final int fullSearchTermLength = searchTerm.text().length();
+    final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength;
+
+    this.text = searchTerm.text().substring(realPrefixLength);
+    this.prefix = searchTerm.text().substring(0, realPrefixLength);
+
+    initializeMaxDistances();
+    this.d = initDistanceArray();
+
+    setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
+  }
+
+  /**
+   * The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
+   * calculate the distance between the given term and the comparing term. 
+   */
+  protected final boolean termCompare(Term term) {
+    if (field == term.field() && term.text().startsWith(prefix)) {
+        final String target = term.text().substring(prefix.length());
+        this.similarity = similarity(target);
+        return (similarity > minimumSimilarity);
    }
+    endEnum = true;
+    return false;
+  }
+  
+  public final float difference() {
+    return (float)((similarity - minimumSimilarity) * scale_factor);
+  }
+  
+  public final boolean endEnum() {
+    return endEnum;
+  }
+  
+  /******************************
+   * Compute Levenshtein distance
+   ******************************/
+  
+  /**
+   * Finds and returns the smallest of three integers 
+   */
+  private static final int min(int a, int b, int c) {
+    final int t = (a < b) ? a : b;
+    return (t < c) ? t : c;
+  }

  private final int[][] initDistanceArray(){
    return new int[this.text.length() + 1][TYPICAL_LONGEST_WORD_IN_INDEX];
@ -192,81 +192,79 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
   * @return the similarity,  0.0 or less indicates that it matches less than the required
   * threshold and 1.0 indicates that the text and target are identical
   */
-    private synchronized final float similarity(final String target) {
-        final int m = target.length();
-        final int n = text.length();
-        if (n == 0)  {
-          //we don't have antyhing to compare.  That means if we just add
-          //the letters for m we get the new word
-          return prefix.length() == 0 ? 0.0f : 1.0f - ((float) m / prefix.length());
-        }
-        if (m == 0) {
-          return prefix.length() == 0 ? 0.0f : 1.0f - ((float) n / prefix.length());
-        }
-
-        final int maxDistance = getMaxDistance(m);
-
-        if (maxDistance < Math.abs(m-n)) {
-          //just adding the characters of m to n or vice-versa results in
-          //too many edits
-          //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
-          //given this optimal circumstance, the edit distance cannot be less than 5.
-          //which is 8-3 or more precisesly Math.abs(3-8).
-          //if our maximum edit distance is 4, than we can discard this word
-          //without looking at it.
-          return 0.0f;
-        }
-
-        //let's make sure we have enough room in our array to do the distance calculations.
-        if (d[0].length <= m) {
-          growDistanceArray(m);
-        }
-
-        // init matrix d
-        for (int i = 0; i <= n; i++) d[i][0] = i;
-        for (int j = 0; j <= m; j++) d[0][j] = j;
-        
-        // start computing edit distance
-        for (int i = 1; i <= n; i++) {
-            int bestPossibleEditDistance = m;
-            final char s_i = text.charAt(i - 1);
-            for (int j = 1; j <= m; j++) {
-                if (s_i != target.charAt(j-1)) {
-                    d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1;
-                }
-                else {
-                  d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]);
-                }
-                bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i][j]);
-            }
-
-          //After calculating row i, the best possible edit distance
-          //can be found by found by finding the smallest value in a given column.
-          //If the bestPossibleEditDistance is greater than the max distance, abort.
-
-          if (i > maxDistance && bestPossibleEditDistance > maxDistance) {  //equal is okay, but not greater
-            //the closest the target can be to the text is just too far away.
-            //this target is leaving the party early.
-            return 0.0f;
-          }
-        }
-
-        // this will return less than 0.0 when the edit distance is
-        // greater than the number of characters in the shorter word.
-        // but this was the formula that was previously used in FuzzyTermEnum,
-        // so it has not been changed (even though minimumSimilarity must be
-        // greater than 0.0)
-        return 1.0f - ((float)d[n][m] / (float) (prefix.length() + Math.min(n, m)));
-
+  private synchronized final float similarity(final String target) {
+    final int m = target.length();
+    final int n = text.length();
+    if (n == 0)  {
+      //we don't have antyhing to compare.  That means if we just add
+      //the letters for m we get the new word
+      return prefix.length() == 0 ? 0.0f : 1.0f - ((float) m / prefix.length());
    }
+    if (m == 0) {
+      return prefix.length() == 0 ? 0.0f : 1.0f - ((float) n / prefix.length());
+    }
+
+    final int maxDistance = getMaxDistance(m);
+
+    if (maxDistance < Math.abs(m-n)) {
+      //just adding the characters of m to n or vice-versa results in
+      //too many edits
+      //for example "pre" length is 3 and "prefixes" length is 8.  We can see that
+      //given this optimal circumstance, the edit distance cannot be less than 5.
+      //which is 8-3 or more precisesly Math.abs(3-8).
+      //if our maximum edit distance is 4, than we can discard this word
+      //without looking at it.
+      return 0.0f;
+    }
+
+    //let's make sure we have enough room in our array to do the distance calculations.
+    if (d[0].length <= m) {
+      growDistanceArray(m);
+    }
+
+    // init matrix d
+    for (int i = 0; i <= n; i++) d[i][0] = i;
+    for (int j = 0; j <= m; j++) d[0][j] = j;
+    
+    // start computing edit distance
+    for (int i = 1; i <= n; i++) {
+      int bestPossibleEditDistance = m;
+      final char s_i = text.charAt(i - 1);
+      for (int j = 1; j <= m; j++) {
+        if (s_i != target.charAt(j-1)) {
+            d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1;
+        }
+        else {
+          d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]);
+        }
+        bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i][j]);
+      }
+
+      //After calculating row i, the best possible edit distance
+      //can be found by found by finding the smallest value in a given column.
+      //If the bestPossibleEditDistance is greater than the max distance, abort.
+
+      if (i > maxDistance && bestPossibleEditDistance > maxDistance) {  //equal is okay, but not greater
+        //the closest the target can be to the text is just too far away.
+        //this target is leaving the party early.
+        return 0.0f;
+      }
+    }
+
+    // this will return less than 0.0 when the edit distance is
+    // greater than the number of characters in the shorter word.
+    // but this was the formula that was previously used in FuzzyTermEnum,
+    // so it has not been changed (even though minimumSimilarity must be
+    // greater than 0.0)
+    return 1.0f - ((float)d[n][m] / (float) (prefix.length() + Math.min(n, m)));
+  }

  /**
   * Grow the second dimension of the array, so that we can calculate the
   * Levenshtein difference.
   */
  private void growDistanceArray(int m) {
-    for (int i = 0; i < d.length; i++)
-    {
+    for (int i = 0; i < d.length; i++) {
      d[i] = new int[m+1];
    }
  }
@ -283,8 +281,7 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
  }

  private void initializeMaxDistances() {
-    for (int i = 0; i < maxDistances.length; i++)
-    {
+    for (int i = 0; i < maxDistances.length; i++) {
      maxDistances[i] = calculateMaxDistance(i);
    }
  }