LUCENE-9575: fix style violations so precommit passes again

2021-01-24 08:06:50 -05:00 · 2021-01-24 08:06:50 -05:00 · 93107d6379
parent c087f6f8c0
commit 93107d6379
4 changed files with 109 additions and 82 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java
@ -17,22 +17,22 @@

 package org.apache.lucene.analysis.pattern;

+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

-import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
 /**
- * Set a type attribute to a parameterized value when tokens are matched by any of a several regex patterns. The
- * value set in the type attribute is parameterized with the match groups of the regex used for matching.
- * In combination with TypeAsSynonymFilter and DropIfFlagged filter this can supply complex synonym patterns
- * that are protected from subsequent analysis, and optionally drop the original term based on the flag
- * set in this filter. See {@link PatternTypingFilterFactory} for full documentation.
+ * Set a type attribute to a parameterized value when tokens are matched by any of a several regex
+ * patterns. The value set in the type attribute is parameterized with the match groups of the regex
+ * used for matching. In combination with TypeAsSynonymFilter and DropIfFlagged filter this can
+ * supply complex synonym patterns that are protected from subsequent analysis, and optionally drop
+ * the original term based on the flag set in this filter. See {@link PatternTypingFilterFactory}
+ * for full documentation.
 *
 * @see PatternTypingFilterFactory
 * @since 8.8.0
@ -55,7 +55,8 @@ public class PatternTypingFilter extends TokenFilter {
      for (PatternTypingRule rule : replacementAndFlagByPattern) {
        Matcher matcher = rule.getPattern().matcher(termAtt);
        if (matcher.find()) {
-          // allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string creation
+          // allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string
+          // creation
          typeAtt.setType(matcher.replaceFirst(rule.getTypeTemplate()));
          flagAtt.setFlags(rule.getFlags());
          return true;
@ -66,9 +67,7 @@ public class PatternTypingFilter extends TokenFilter {
    return false;
  }

-  /**
-   * Value holding class for pattern typing rules.
-   */
+  /** Value holding class for pattern typing rules. */
  public static class PatternTypingRule {
    private final Pattern pattern;
    private final int flags;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java
@ -17,22 +17,21 @@

 package org.apache.lucene.analysis.pattern;

+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
 import org.apache.lucene.analysis.TokenFilterFactory;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
 import org.apache.lucene.util.ResourceLoader;
 import org.apache.lucene.util.ResourceLoaderAware;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-
 /**
- * Provides a filter that will analyze tokens with the analyzer from an arbitrary field type. By itself this
- * filter is not very useful. Normally it is combined with a filter that reacts to types or flags.
+ * Provides a filter that will analyze tokens with the analyzer from an arbitrary field type. By
+ * itself this filter is not very useful. Normally it is combined with a filter that reacts to types
+ * or flags.
 *
 * <pre class="prettyprint" >
 * &lt;fieldType name="text_taf" class="solr.TextField" positionIncrementGap="100"&gt;
@ -44,37 +43,38 @@ import java.util.regex.Pattern;
 *               ignore="word,&amp;lt;ALPHANUM&amp;gt;,&amp;lt;NUM&amp;gt;,&amp;lt;SOUTHEAST_ASIAN&amp;gt;,&amp;lt;IDEOGRAPHIC&amp;gt;,&amp;lt;HIRAGANA&amp;gt;,&amp;lt;KATAKANA&amp;gt;,&amp;lt;HANGUL&amp;gt;,&amp;lt;EMOJI&amp;gt;"/&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
- * <p>
- * Note that a configuration such as above may interfere with multi-word synonyms. The patterns file has the format:
+ *
+ * <p>Note that a configuration such as above may interfere with multi-word synonyms. The patterns
+ * file has the format:
+ *
 * <pre>
 * (flags) (pattern) ::: (replacement)
 * </pre>
- * Therefore to set the first 2 flag bits on the original token matching 401k or 401(k) and adding a type of
- * 'legal2_401_k' whenever either one is encountered one would use:
+ *
+ * Therefore to set the first 2 flag bits on the original token matching 401k or 401(k) and adding a
+ * type of 'legal2_401_k' whenever either one is encountered one would use:
+ *
 * <pre>
 * 3 (\d+)\(?([a-z])\)? ::: legal2_$1_$2
 * </pre>
- * Note that the number indicating the flag bits to set must not have leading spaces and be followed by a single
- * space, and must be 0 if no flags should be set. The flags number should not contain commas or a decimal point.
- * Lines for which the first character is <code>#</code> will be ignored as comments.  Does not support producing
- * a synonym textually identical to the original term.
+ *
+ * Note that the number indicating the flag bits to set must not have leading spaces and be followed
+ * by a single space, and must be 0 if no flags should be set. The flags number should not contain
+ * commas or a decimal point. Lines for which the first character is <code>#</code> will be ignored
+ * as comments. Does not support producing a synonym textually identical to the original term.
 *
 * @lucene.spi {@value #NAME}
 * @since 8.8
 */
 public class PatternTypingFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {

-  /**
-   * SPI name
-   */
+  /** SPI name */
  public static final String NAME = "patternTyping";

  private final String patternFile;
  private PatternTypingRule[] rules;

-  /**
-   * Creates a new PatternTypingFilterFactory
-   */
+  /** Creates a new PatternTypingFilterFactory */
  public PatternTypingFilterFactory(Map<String, String> args) {
    super(args);
    patternFile = require(args, "patternFile");
@ -83,9 +83,7 @@ public class PatternTypingFilterFactory extends TokenFilterFactory implements Re
    }
  }

-  /**
-   * Default ctor for compatibility with SPI
-   */
+  /** Default ctor for compatibility with SPI */
  public PatternTypingFilterFactory() {
    throw defaultCtorException();
  }
@ -94,16 +92,19 @@ public class PatternTypingFilterFactory extends TokenFilterFactory implements Re
  public void inform(ResourceLoader loader) throws IOException {
    List<PatternTypingRule> ruleList = new ArrayList<>();
    List<String> lines = getLines(loader, patternFile);
-    // format: # regex ::: typename[_$1[_$2 ...]]    (technically _$1 does not need the '_' but it usually makes sense)
+    // format: # regex ::: typename[_$1[_$2 ...]]    (technically _$1 does not need the '_' but it
+    // usually makes sense)
    // eg: 2 (\d+\(?([a-z])\)?\(?(\d+)\)? ::: legal3_$1_$2_3
    // which yields legal3_501_c_3 for 501(c)(3) or 501c3 and sets the second lowest bit in flags
    for (String line : lines) {
      int firstSpace = line.indexOf(" "); // no leading spaces allowed
      int flagsVal = Integer.parseInt(line.substring(0, firstSpace));
      line = line.substring(firstSpace + 1);
-      String[] split = line.split(" ::: "); // arbitrary, unlikely to occur in a useful regex easy to read
+      String[] split =
+          line.split(" ::: "); // arbitrary, unlikely to occur in a useful regex easy to read
      if (split.length != 2) {
-        throw new RuntimeException("The PatternTypingFilter: Always two there are, no more, no less, a pattern and a replacement (separated by ' ::: ' )");
+        throw new RuntimeException(
+            "The PatternTypingFilter: Always two there are, no more, no less, a pattern and a replacement (separated by ' ::: ' )");
      }
      Pattern compiled = Pattern.compile(split[0]);
      ruleList.add(new PatternTypingRule(compiled, flagsVal, split[1]));
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java
@ -16,23 +16,18 @@
 */
 package org.apache.lucene.analysis.pattern;

+import java.io.IOException;
+import java.util.regex.Pattern;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;

-import java.io.IOException;
-import java.util.regex.Pattern;
-
-/**
- * Test that this filter sets a type for tokens matching patterns defined in a patterns.txt file
- */
+/** Test that this filter sets a type for tokens matching patterns defined in a patterns.txt file */
 public class TestPatternTypingFilter extends BaseTokenStreamTestCase {

-  /**
-   * Test the straight forward cases. When all flags match the token should be dropped
-   */
+  /** Test the straight forward cases. When all flags match the token should be dropped */
  public void testPatterns() throws Exception {

    Token tokenA1 = new Token("One", 0, 2);
@ -44,13 +39,24 @@ public class TestPatternTypingFilter extends BaseTokenStreamTestCase {
    TokenStream ts = new CannedTokenStream(tokenA1, tokenA2, tokenA3, tokenB1, tokenB2);

    // 2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
-    ts = new PatternTypingFilter(ts,
+    ts =
+        new PatternTypingFilter(
+            ts,
            new PatternTypingRule(Pattern.compile("^(\\d+)\\(?([a-z])\\)?$"), 2, "legal2_$1_$2"));

-    assertTokenStreamContents(ts, new String[]{
-            "One", "401(k)", "two", "three", "401k"}, null, null,
+    assertTokenStreamContents(
+        ts,
+        new String[] {"One", "401(k)", "two", "three", "401k"},
+        null,
+        null,
        new String[] {"word", "legal2_401_k", "word", "word", "legal2_401_k"},
-        null, null, null, null, null, false, null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        false,
+        null,
        new int[] {0, 2, 0, 0, 2});
  }

@ -62,16 +68,26 @@ public class TestPatternTypingFilter extends BaseTokenStreamTestCase {
    TokenStream ts = new CannedTokenStream(tokenA1, tokenA3, tokenB1);

    // 2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
-    PatternTypingRule p1 = new PatternTypingRule(Pattern.compile("^(\\d+)-(\\d+)$"), 6, "$1_hnum_$2");
-    PatternTypingRule p2 = new PatternTypingRule(Pattern.compile("^(\\w+)-(\\w+)$"), 2, "$1_hword_$2");
+    PatternTypingRule p1 =
+        new PatternTypingRule(Pattern.compile("^(\\d+)-(\\d+)$"), 6, "$1_hnum_$2");
+    PatternTypingRule p2 =
+        new PatternTypingRule(Pattern.compile("^(\\w+)-(\\w+)$"), 2, "$1_hword_$2");

    ts = new PatternTypingFilter(ts, p1, p2); // 101

-    assertTokenStreamContents(ts, new String[]{
-            "One", "forty-two", "4-2"}, null, null,
+    assertTokenStreamContents(
+        ts,
+        new String[] {"One", "forty-two", "4-2"},
+        null,
+        null,
        new String[] {"word", "forty_hword_two", "4_hnum_2"},
-        null, null, null, null, null, false, null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        false,
+        null,
        new int[] {0, 2, 6});
  }
-
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java
@ -25,9 +25,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.StringMockResourceLoader;
 import org.apache.lucene.util.Version;

-/**
- * This test just ensures the factory works
- */
+/** This test just ensures the factory works */
 public class TestPatternTypingFilterFactory extends BaseTokenStreamFactoryTestCase {

  public void testFactory() throws Exception {
@ -37,16 +35,29 @@ public class TestPatternTypingFilterFactory extends BaseTokenStreamFactoryTestCa

    TokenStream ts = new CannedTokenStream(tokenA1, tokenA3, tokenB1);

-    TokenFilterFactory tokenFilterFactory = tokenFilterFactory("patternTyping", Version.LATEST, new StringMockResourceLoader(
-        "6 \\b(\\d+)-(\\d+) ::: $1_hnum_$2\n" +
-        "2 \\b(\\w+)-(\\w+) ::: $1_hword_$2"
-    ), "patternFile", "patterns.txt");
+    TokenFilterFactory tokenFilterFactory =
+        tokenFilterFactory(
+            "patternTyping",
+            Version.LATEST,
+            new StringMockResourceLoader(
+                "6 \\b(\\d+)-(\\d+) ::: $1_hnum_$2\n" + "2 \\b(\\w+)-(\\w+) ::: $1_hword_$2"),
+            "patternFile",
+            "patterns.txt");

    ts = tokenFilterFactory.create(ts);
-    assertTokenStreamContents(ts, new String[]{
-            "One", "forty-two", "4-2"}, null, null,
+    assertTokenStreamContents(
+        ts,
+        new String[] {"One", "forty-two", "4-2"},
+        null,
+        null,
        new String[] {"word", "forty_hword_two", "4_hnum_2"},
-        null, null, null, null, null, false, null,
+        null,
+        null,
+        null,
+        null,
+        null,
+        false,
+        null,
        new int[] {0, 2, 6});
  }
 }