push: Allow wildcards when matching words

This commit is contained in:
Kévin Commaille 2022-06-23 15:57:54 +02:00 committed by GitHub
parent 33e1a20c4b
commit d192184b3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 126 additions and 33 deletions

View File

@ -4,6 +4,9 @@ Bug fixes:
* Expose `MatrixIdError`, `MatrixToError`, `MatrixUriError` and `MxcUriError` at
the crate root
* Allow wildcards for push conditions on `content.body`
* The spec clarified the behavior of the `event_match` condition:
<https://github.com/matrix-org/matrix-spec-proposals/pull/3690>
Breaking changes:

View File

@ -61,6 +61,7 @@ js_option = "0.1.0"
percent-encoding = "2.1.0"
pulldown-cmark = { version = "0.9.1", default-features = false, optional = true }
rand_crate = { package = "rand", version = "0.8.3", optional = true }
regex = { version = "1.5.6", default-features = false, features = ["std", "perf"] }
ruma-identifiers-validation = { version = "0.8.1", path = "../ruma-identifiers-validation", default-features = false }
ruma-macros = { version = "0.9.2", path = "../ruma-macros" }
serde = { version = "1.0.118", features = ["derive"] }

View File

@ -1,6 +1,7 @@
use std::{collections::BTreeMap, ops::RangeBounds, str::FromStr};
use js_int::{Int, UInt};
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::{to_value as to_json_value, value::Value as JsonValue};
use tracing::{instrument, warn};
@ -12,6 +13,13 @@ mod room_member_count_is;
pub use room_member_count_is::{ComparisonOperator, RoomMemberCountIs};
/// The characters that are defined as a word boundary in the [Matrix spec].
///
/// Any character not in the sets `[A-Z]`, `[a-z]`, `[0-9]` or `_`.
///
/// [Matrix spec]: https://spec.matrix.org/v1.3/client-server-api/#conditions-1
const WORD_BOUNDARY_CHARACTERS: &str = "[^A-Za-z0-9_]";
/// A condition that must apply for an associated push rule's action to be taken.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[cfg_attr(not(feature = "unstable-exhaustive-types"), non_exhaustive)]
@ -169,17 +177,27 @@ trait StrExt {
/// Matches this string against `pattern`.
///
/// The pattern can be a glob with wildcards `*` and `?`.
///
/// The match is case insensitive.
///
/// If `match_words` is `true`, looks for `pattern` as a substring of `self`,
/// and checks that it is separated from other words. Otherwise, checks
/// `pattern` as a glob with wildcards `*` and `?`.
/// If `match_words` is `true`, checks that the pattern is separated from other words.
fn matches_pattern(&self, pattern: &str, match_words: bool) -> bool;
/// Matches this string against `pattern`, with word boundaries.
///
/// The pattern can be a glob with wildcards `*` and `?`.
///
/// A word boundary is defined as the start or end of the value, or any character not in the
/// sets `[A-Z]`, `[a-z]`, `[0-9]` or `_`.
///
/// The match is case sensitive.
fn matches_word(&self, pattern: &str) -> bool;
/// Translate the wildcards in `self` to a regex syntax.
///
/// `self` must only contain wildcards.
fn wildcards_to_regex(&self) -> String;
}
impl StrExt for str {
@ -229,40 +247,92 @@ impl StrExt for str {
return false;
}
match self.find(pattern) {
Some(start) => {
let end = start + pattern.len();
let has_wildcards = pattern.contains(|c| matches!(c, '?' | '*'));
// Look if the match has word boundaries.
let word_boundary_start = !self.char_at(start).is_word_char()
|| self.find_prev_char(start).map_or(true, |c| !c.is_word_char());
if has_wildcards {
let mut chunks: Vec<String> = vec![];
let mut prev_wildcard = false;
let mut chunk_start = 0;
if word_boundary_start {
let word_boundary_end = end == self.len()
|| !self.find_prev_char(end).unwrap().is_word_char()
|| !self.char_at(end).is_word_char();
if word_boundary_end {
return true;
for (i, c) in pattern.char_indices() {
if matches!(c, '?' | '*') && !prev_wildcard {
if i != 0 {
chunks.push(regex::escape(&pattern[chunk_start..i]));
chunk_start = i;
}
prev_wildcard = true;
} else if prev_wildcard {
let chunk = &pattern[chunk_start..i];
chunks.push(chunk.wildcards_to_regex());
chunk_start = i;
prev_wildcard = false;
}
// Find next word.
let non_word_str = &self[start..];
let non_word = match non_word_str.find(|c: char| !c.is_word_char()) {
Some(pos) => pos,
None => return false,
};
let word_str = &non_word_str[non_word..];
let word = match word_str.find(|c: char| c.is_word_char()) {
Some(pos) => pos,
None => return false,
};
word_str[word..].matches_word(pattern)
}
None => false,
let len = pattern.len();
if !prev_wildcard {
chunks.push(regex::escape(&pattern[chunk_start..len]));
} else if prev_wildcard {
let chunk = &pattern[chunk_start..len];
chunks.push(chunk.wildcards_to_regex());
}
let regex = format!(
"(?:^|{WORD_BOUNDARY_CHARACTERS}){}(?:{WORD_BOUNDARY_CHARACTERS}|$)",
chunks.concat()
);
Regex::new(&regex).ok().filter(|re| re.is_match(self)).is_some()
} else {
match self.find(pattern) {
Some(start) => {
let end = start + pattern.len();
// Look if the match has word boundaries.
let word_boundary_start = !self.char_at(start).is_word_char()
|| self.find_prev_char(start).map_or(true, |c| !c.is_word_char());
if word_boundary_start {
let word_boundary_end = end == self.len()
|| !self.find_prev_char(end).unwrap().is_word_char()
|| !self.char_at(end).is_word_char();
if word_boundary_end {
return true;
}
}
// Find next word.
let non_word_str = &self[start..];
let non_word = match non_word_str.find(|c: char| !c.is_word_char()) {
Some(pos) => pos,
None => return false,
};
let word_str = &non_word_str[non_word..];
let word = match word_str.find(|c: char| c.is_word_char()) {
Some(pos) => pos,
None => return false,
};
word_str[word..].matches_word(pattern)
}
None => false,
}
}
}
fn wildcards_to_regex(&self) -> String {
// Simplify pattern to avoid performance issues:
// - The glob `?**?**?` is equivalent to the glob `???*`
// - The glob `???*` is equivalent to the regex `.{3,}`
let question_marks = self.matches('?').count();
if self.contains('*') {
format!(".{{{question_marks},}}")
} else {
format!(".{{{question_marks}}}")
}
}
}
@ -443,6 +513,19 @@ mod tests {
assert!("Ruma Dev👩💻".matches_word("Dev"));
assert!("Ruma Dev👩💻".matches_word("👩‍💻"));
assert!("Ruma Dev👩💻".matches_word("Dev👩💻"));
// Regex syntax is escaped
assert!(!"matrix".matches_word(r"\w*"));
assert!(r"\w".matches_word(r"\w*"));
assert!(!"matrix".matches_word("[a-z]*"));
assert!("[a-z] and [0-9]".matches_word("[a-z]*"));
assert!(!"m".matches_word("[[:alpha:]]?"));
assert!("[[:alpha:]]!".matches_word("[[:alpha:]]?"));
// From the spec: <https://spec.matrix.org/v1.3/client-server-api/#conditions-1>
assert!("An example event.".matches_word("ex*ple"));
assert!("exple".matches_word("ex*ple"));
assert!("An exciting triple-whammy".matches_word("ex*ple"));
}
#[test]
@ -451,7 +534,7 @@ mod tests {
assert!("foo bar".matches_pattern("foo", true));
assert!("Foo bar".matches_pattern("foo", true));
assert!(!"foobar".matches_pattern("foo", true));
assert!(!"foo bar".matches_pattern("foo*", true));
assert!("foo bar".matches_pattern("foo*", true));
assert!("".matches_pattern("", true));
assert!(!"foo".matches_pattern("", true));
@ -467,6 +550,12 @@ mod tests {
assert!("".matches_pattern("", false));
assert!("".matches_pattern("*", false));
assert!(!"foo".matches_pattern("", false));
// From the spec: <https://spec.matrix.org/v1.3/client-server-api/#conditions-1>
assert!("Lunch plans".matches_pattern("lunc?*", false));
assert!("LUNCH".matches_pattern("lunc?*", false));
assert!(!" lunch".matches_pattern("lunc?*", false));
assert!(!"lunc".matches_pattern("lunc?*", false));
}
#[test]