From 502f15a89b41be94e07834e7a730aee464560a0a Mon Sep 17 00:00:00 2001 From: ThomasDC <1496708+thomasdc@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:28:07 +0100 Subject: [PATCH] Let WordDelimiterGraphFilterFactory propagate ignoreKeywords flag (#12525) * Let WordDelimiterGraphFilterFactory propagate ignoreKeywords flag fixes https://github.com/apache/lucene/issues/12522 * Document changes * Align with default in code --- lucene/CHANGES.txt | 2 ++ .../miscellaneous/WordDelimiterGraphFilterFactory.java | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b0a53f9e2ce..555492406ee 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -404,6 +404,8 @@ New Features * GITHUB#12479: Add new Maximum Inner Product vector similarity function for non-normalized dot-product vector search. (Jack Mazanec, Ben Trent) +* GITHUB#12525: `WordDelimiterGraphFilterFactory` now supports the `ignoreKeywords` flag (Thomas De Craemer) + * GITHUB#12489: Add support for recursive graph bisection, also called bipartite graph partitioning, and often abbreviated BP, an algorithm for reordering doc IDs that results in more compact postings and faster queries, diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java index 09ec073bd9b..63634687e2b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java @@ -45,7 +45,7 @@ import org.apache.lucene.util.ResourceLoaderAware; * preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1" * catenateWords="0" catenateNumbers="0" catenateAll="0" * generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1" - * types="wdfftypes.txt" /> + * types="wdfftypes.txt" ignoreKeywords="0" /> * </analyzer> * </fieldType> * @@ -100,6 +100,9 @@ public class WordDelimiterGraphFilterFactory extends TokenFilterFactory if (getInt(args, "stemEnglishPossessive", 1) != 0) { flags |= STEM_ENGLISH_POSSESSIVE; } + if (getInt(args, "ignoreKeywords", 0) != 0) { + flags |= IGNORE_KEYWORDS; + } wordFiles = get(args, PROTECTED_TOKENS); types = get(args, TYPES); this.flags = flags;