PERF: improve `findAllMatches` speed (#22083)
When we introduced unicode support in the regular expressions used in watched words (9a27803
) we didn't realize the cost adding the `u` flag would be.
Turns out, it's pretty bad when you have lots of regular expressions to test. A customer had slightly less than 200 watched words, and it would freeze the browser for about 2s on the first check of those regular expressions (roughly 10ms per regular expression).
This commit introduces a new field (`word`) to the serialized watched words which is then converted to a very fast and cheap regular expression on the client-side. We use that regexp to quicly check whether a matcher is even worth trying so that we don't incure the cost of compiling the expensive unicode regexp.
This commit also busts the `WordWatcher` cache since we added a new field to be serialized.
One nice side effect of using `matchAll` instead of a `while / exec` loop is that the likeliness of having a bad regexp matching infinitely is vastly reduced 🙌
This commit is contained in:
parent
367b3be035
commit
4cb3412a56
|
@ -1683,6 +1683,7 @@ var bar = 'bar';
|
||||||
const opts = {
|
const opts = {
|
||||||
watchedWordsReplace: {
|
watchedWordsReplace: {
|
||||||
"(?:\\W|^)(fun)(?=\\W|$)": {
|
"(?:\\W|^)(fun)(?=\\W|$)": {
|
||||||
|
word: "fun",
|
||||||
replacement: "times",
|
replacement: "times",
|
||||||
case_sensitive: false,
|
case_sensitive: false,
|
||||||
},
|
},
|
||||||
|
@ -1697,6 +1698,7 @@ var bar = 'bar';
|
||||||
const opts = {
|
const opts = {
|
||||||
watchedWordsLink: {
|
watchedWordsLink: {
|
||||||
"(?:\\W|^)(fun)(?=\\W|$)": {
|
"(?:\\W|^)(fun)(?=\\W|$)": {
|
||||||
|
word: "fun",
|
||||||
replacement: "https://discourse.org",
|
replacement: "https://discourse.org",
|
||||||
case_sensitive: false,
|
case_sensitive: false,
|
||||||
},
|
},
|
||||||
|
@ -1711,18 +1713,21 @@ var bar = 'bar';
|
||||||
});
|
});
|
||||||
|
|
||||||
test("watched words replace with bad regex", function (assert) {
|
test("watched words replace with bad regex", function (assert) {
|
||||||
const maxMatches = 100; // same limit as MD watched-words-replace plugin
|
|
||||||
const opts = {
|
const opts = {
|
||||||
siteSettings: { watched_words_regular_expressions: true },
|
siteSettings: { watched_words_regular_expressions: true },
|
||||||
watchedWordsReplace: {
|
watchedWordsReplace: {
|
||||||
"(\\bu?\\b)": { replacement: "you", case_sensitive: false },
|
"(\\bu?\\b)": {
|
||||||
|
word: "(\\bu?\\b)",
|
||||||
|
replacement: "you",
|
||||||
|
case_sensitive: false,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
assert.cookedOptions(
|
assert.cookedOptions(
|
||||||
"one",
|
"one",
|
||||||
opts,
|
opts,
|
||||||
`<p>${"you".repeat(maxMatches)}one</p>`,
|
`<p>youoneyou</p>`,
|
||||||
"does not loop infinitely"
|
"does not loop infinitely"
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
|
@ -16,22 +16,26 @@ function isLinkClose(str) {
|
||||||
function findAllMatches(text, matchers) {
|
function findAllMatches(text, matchers) {
|
||||||
const matches = [];
|
const matches = [];
|
||||||
|
|
||||||
let count = 0;
|
for (const { word, pattern, replacement, link } of matchers) {
|
||||||
|
if (matches.length >= MAX_MATCHES) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
matchers.forEach((matcher) => {
|
if (word.test(text)) {
|
||||||
let match;
|
for (const match of text.matchAll(pattern)) {
|
||||||
while (
|
|
||||||
(match = matcher.pattern.exec(text)) !== null &&
|
|
||||||
count++ < MAX_MATCHES
|
|
||||||
) {
|
|
||||||
matches.push({
|
matches.push({
|
||||||
index: match.index + match[0].indexOf(match[1]),
|
index: match.index + match[0].indexOf(match[1]),
|
||||||
text: match[1],
|
text: match[1],
|
||||||
replacement: matcher.replacement,
|
replacement,
|
||||||
link: matcher.link,
|
link,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (matches.length >= MAX_MATCHES) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
return matches.sort((a, b) => a.index - b.index);
|
return matches.sort((a, b) => a.index - b.index);
|
||||||
}
|
}
|
||||||
|
@ -52,11 +56,12 @@ export function setup(helper) {
|
||||||
const matchers = [];
|
const matchers = [];
|
||||||
|
|
||||||
if (md.options.discourse.watchedWordsReplace) {
|
if (md.options.discourse.watchedWordsReplace) {
|
||||||
Object.entries(md.options.discourse.watchedWordsReplace).map(
|
Object.entries(md.options.discourse.watchedWordsReplace).forEach(
|
||||||
([regexpString, options]) => {
|
([regexpString, options]) => {
|
||||||
const word = toWatchedWord({ [regexpString]: options });
|
const word = toWatchedWord({ [regexpString]: options });
|
||||||
|
|
||||||
matchers.push({
|
matchers.push({
|
||||||
|
word: new RegExp(options.word, options.case_sensitive ? "" : "i"),
|
||||||
pattern: createWatchedWordRegExp(word),
|
pattern: createWatchedWordRegExp(word),
|
||||||
replacement: options.replacement,
|
replacement: options.replacement,
|
||||||
link: false,
|
link: false,
|
||||||
|
@ -66,11 +71,12 @@ export function setup(helper) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (md.options.discourse.watchedWordsLink) {
|
if (md.options.discourse.watchedWordsLink) {
|
||||||
Object.entries(md.options.discourse.watchedWordsLink).map(
|
Object.entries(md.options.discourse.watchedWordsLink).forEach(
|
||||||
([regexpString, options]) => {
|
([regexpString, options]) => {
|
||||||
const word = toWatchedWord({ [regexpString]: options });
|
const word = toWatchedWord({ [regexpString]: options });
|
||||||
|
|
||||||
matchers.push({
|
matchers.push({
|
||||||
|
word: new RegExp(options.word, options.case_sensitive ? "" : "i"),
|
||||||
pattern: createWatchedWordRegExp(word),
|
pattern: createWatchedWordRegExp(word),
|
||||||
replacement: options.replacement,
|
replacement: options.replacement,
|
||||||
link: true,
|
link: true,
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
class WordWatcher
|
class WordWatcher
|
||||||
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
REPLACEMENT_LETTER ||= CGI.unescape_html("■")
|
||||||
CACHE_VERSION = 2
|
CACHE_VERSION ||= 3
|
||||||
|
|
||||||
def initialize(raw)
|
def initialize(raw)
|
||||||
@raw = raw
|
@raw = raw
|
||||||
|
@ -24,8 +24,9 @@ class WordWatcher
|
||||||
.limit(WatchedWord::MAX_WORDS_PER_ACTION)
|
.limit(WatchedWord::MAX_WORDS_PER_ACTION)
|
||||||
.order(:id)
|
.order(:id)
|
||||||
.pluck(:word, :replacement, :case_sensitive)
|
.pluck(:word, :replacement, :case_sensitive)
|
||||||
.map { |w, r, c| [w, { replacement: r, case_sensitive: c }.compact] }
|
.to_h do |w, r, c|
|
||||||
.to_h
|
[w, { word: word_to_regexp(w, whole: false), replacement: r, case_sensitive: c }.compact]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.words_for_action_exists?(action)
|
def self.words_for_action_exists?(action)
|
||||||
|
@ -78,9 +79,7 @@ class WordWatcher
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.word_matcher_regexps(action, engine: :ruby)
|
def self.word_matcher_regexps(action, engine: :ruby)
|
||||||
if words = get_cached_words(action)
|
get_cached_words(action)&.to_h { |word, attrs| [word_to_regexp(word, engine: engine), attrs] }
|
||||||
words.map { |word, attrs| [word_to_regexp(word, engine: engine), attrs] }.to_h
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.word_to_regexp(word, engine: :ruby, whole: true)
|
def self.word_to_regexp(word, engine: :ruby, whole: true)
|
||||||
|
|
|
@ -21,9 +21,11 @@ RSpec.describe WordWatcher do
|
||||||
expect(described_class.words_for_action(:block)).to include(
|
expect(described_class.words_for_action(:block)).to include(
|
||||||
word1 => {
|
word1 => {
|
||||||
case_sensitive: false,
|
case_sensitive: false,
|
||||||
|
word: word1,
|
||||||
},
|
},
|
||||||
word2 => {
|
word2 => {
|
||||||
case_sensitive: true,
|
case_sensitive: true,
|
||||||
|
word: word2,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
@ -40,6 +42,7 @@ RSpec.describe WordWatcher do
|
||||||
word => {
|
word => {
|
||||||
case_sensitive: false,
|
case_sensitive: false,
|
||||||
replacement: "http://test.localhost/",
|
replacement: "http://test.localhost/",
|
||||||
|
word: word,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue