push: Allow wildcards when matching words

This commit is contained in:
Kévin Commaille 2022-06-23 15:57:54 +02:00 committed by GitHub
parent 33e1a20c4b
commit d192184b3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 126 additions and 33 deletions

View File

@ -4,6 +4,9 @@ Bug fixes:
* Expose `MatrixIdError`, `MatrixToError`, `MatrixUriError` and `MxcUriError` at * Expose `MatrixIdError`, `MatrixToError`, `MatrixUriError` and `MxcUriError` at
the crate root the crate root
* Allow wildcards for push conditions on `content.body`
* The spec clarified the behavior of the `event_match` condition:
<https://github.com/matrix-org/matrix-spec-proposals/pull/3690>
Breaking changes: Breaking changes:

View File

@ -61,6 +61,7 @@ js_option = "0.1.0"
percent-encoding = "2.1.0" percent-encoding = "2.1.0"
pulldown-cmark = { version = "0.9.1", default-features = false, optional = true } pulldown-cmark = { version = "0.9.1", default-features = false, optional = true }
rand_crate = { package = "rand", version = "0.8.3", optional = true } rand_crate = { package = "rand", version = "0.8.3", optional = true }
regex = { version = "1.5.6", default-features = false, features = ["std", "perf"] }
ruma-identifiers-validation = { version = "0.8.1", path = "../ruma-identifiers-validation", default-features = false } ruma-identifiers-validation = { version = "0.8.1", path = "../ruma-identifiers-validation", default-features = false }
ruma-macros = { version = "0.9.2", path = "../ruma-macros" } ruma-macros = { version = "0.9.2", path = "../ruma-macros" }
serde = { version = "1.0.118", features = ["derive"] } serde = { version = "1.0.118", features = ["derive"] }

View File

@ -1,6 +1,7 @@
use std::{collections::BTreeMap, ops::RangeBounds, str::FromStr}; use std::{collections::BTreeMap, ops::RangeBounds, str::FromStr};
use js_int::{Int, UInt}; use js_int::{Int, UInt};
use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{to_value as to_json_value, value::Value as JsonValue}; use serde_json::{to_value as to_json_value, value::Value as JsonValue};
use tracing::{instrument, warn}; use tracing::{instrument, warn};
@ -12,6 +13,13 @@ mod room_member_count_is;
pub use room_member_count_is::{ComparisonOperator, RoomMemberCountIs}; pub use room_member_count_is::{ComparisonOperator, RoomMemberCountIs};
/// The characters that are defined as a word boundary in the [Matrix spec].
///
/// Any character not in the sets `[A-Z]`, `[a-z]`, `[0-9]` or `_`.
///
/// [Matrix spec]: https://spec.matrix.org/v1.3/client-server-api/#conditions-1
const WORD_BOUNDARY_CHARACTERS: &str = "[^A-Za-z0-9_]";
/// A condition that must apply for an associated push rule's action to be taken. /// A condition that must apply for an associated push rule's action to be taken.
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
#[cfg_attr(not(feature = "unstable-exhaustive-types"), non_exhaustive)] #[cfg_attr(not(feature = "unstable-exhaustive-types"), non_exhaustive)]
@ -169,17 +177,27 @@ trait StrExt {
/// Matches this string against `pattern`. /// Matches this string against `pattern`.
/// ///
/// The pattern can be a glob with wildcards `*` and `?`.
///
/// The match is case insensitive. /// The match is case insensitive.
/// ///
/// If `match_words` is `true`, looks for `pattern` as a substring of `self`, /// If `match_words` is `true`, checks that the pattern is separated from other words.
/// and checks that it is separated from other words. Otherwise, checks
/// `pattern` as a glob with wildcards `*` and `?`.
fn matches_pattern(&self, pattern: &str, match_words: bool) -> bool; fn matches_pattern(&self, pattern: &str, match_words: bool) -> bool;
/// Matches this string against `pattern`, with word boundaries. /// Matches this string against `pattern`, with word boundaries.
/// ///
/// The pattern can be a glob with wildcards `*` and `?`.
///
/// A word boundary is defined as the start or end of the value, or any character not in the
/// sets `[A-Z]`, `[a-z]`, `[0-9]` or `_`.
///
/// The match is case sensitive. /// The match is case sensitive.
fn matches_word(&self, pattern: &str) -> bool; fn matches_word(&self, pattern: &str) -> bool;
/// Translate the wildcards in `self` to a regex syntax.
///
/// `self` must only contain wildcards.
fn wildcards_to_regex(&self) -> String;
} }
impl StrExt for str { impl StrExt for str {
@ -229,6 +247,44 @@ impl StrExt for str {
return false; return false;
} }
let has_wildcards = pattern.contains(|c| matches!(c, '?' | '*'));
if has_wildcards {
let mut chunks: Vec<String> = vec![];
let mut prev_wildcard = false;
let mut chunk_start = 0;
for (i, c) in pattern.char_indices() {
if matches!(c, '?' | '*') && !prev_wildcard {
if i != 0 {
chunks.push(regex::escape(&pattern[chunk_start..i]));
chunk_start = i;
}
prev_wildcard = true;
} else if prev_wildcard {
let chunk = &pattern[chunk_start..i];
chunks.push(chunk.wildcards_to_regex());
chunk_start = i;
prev_wildcard = false;
}
}
let len = pattern.len();
if !prev_wildcard {
chunks.push(regex::escape(&pattern[chunk_start..len]));
} else if prev_wildcard {
let chunk = &pattern[chunk_start..len];
chunks.push(chunk.wildcards_to_regex());
}
let regex = format!(
"(?:^|{WORD_BOUNDARY_CHARACTERS}){}(?:{WORD_BOUNDARY_CHARACTERS}|$)",
chunks.concat()
);
Regex::new(&regex).ok().filter(|re| re.is_match(self)).is_some()
} else {
match self.find(pattern) { match self.find(pattern) {
Some(start) => { Some(start) => {
let end = start + pattern.len(); let end = start + pattern.len();
@ -267,6 +323,20 @@ impl StrExt for str {
} }
} }
fn wildcards_to_regex(&self) -> String {
// Simplify pattern to avoid performance issues:
// - The glob `?**?**?` is equivalent to the glob `???*`
// - The glob `???*` is equivalent to the regex `.{3,}`
let question_marks = self.matches('?').count();
if self.contains('*') {
format!(".{{{question_marks},}}")
} else {
format!(".{{{question_marks}}}")
}
}
}
/// The flattened representation of a JSON object. /// The flattened representation of a JSON object.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct FlattenedJson { pub struct FlattenedJson {
@ -443,6 +513,19 @@ mod tests {
assert!("Ruma Dev👩💻".matches_word("Dev")); assert!("Ruma Dev👩💻".matches_word("Dev"));
assert!("Ruma Dev👩💻".matches_word("👩‍💻")); assert!("Ruma Dev👩💻".matches_word("👩‍💻"));
assert!("Ruma Dev👩💻".matches_word("Dev👩💻")); assert!("Ruma Dev👩💻".matches_word("Dev👩💻"));
// Regex syntax is escaped
assert!(!"matrix".matches_word(r"\w*"));
assert!(r"\w".matches_word(r"\w*"));
assert!(!"matrix".matches_word("[a-z]*"));
assert!("[a-z] and [0-9]".matches_word("[a-z]*"));
assert!(!"m".matches_word("[[:alpha:]]?"));
assert!("[[:alpha:]]!".matches_word("[[:alpha:]]?"));
// From the spec: <https://spec.matrix.org/v1.3/client-server-api/#conditions-1>
assert!("An example event.".matches_word("ex*ple"));
assert!("exple".matches_word("ex*ple"));
assert!("An exciting triple-whammy".matches_word("ex*ple"));
} }
#[test] #[test]
@ -451,7 +534,7 @@ mod tests {
assert!("foo bar".matches_pattern("foo", true)); assert!("foo bar".matches_pattern("foo", true));
assert!("Foo bar".matches_pattern("foo", true)); assert!("Foo bar".matches_pattern("foo", true));
assert!(!"foobar".matches_pattern("foo", true)); assert!(!"foobar".matches_pattern("foo", true));
assert!(!"foo bar".matches_pattern("foo*", true)); assert!("foo bar".matches_pattern("foo*", true));
assert!("".matches_pattern("", true)); assert!("".matches_pattern("", true));
assert!(!"foo".matches_pattern("", true)); assert!(!"foo".matches_pattern("", true));
@ -467,6 +550,12 @@ mod tests {
assert!("".matches_pattern("", false)); assert!("".matches_pattern("", false));
assert!("".matches_pattern("*", false)); assert!("".matches_pattern("*", false));
assert!(!"foo".matches_pattern("", false)); assert!(!"foo".matches_pattern("", false));
// From the spec: <https://spec.matrix.org/v1.3/client-server-api/#conditions-1>
assert!("Lunch plans".matches_pattern("lunc?*", false));
assert!("LUNCH".matches_pattern("lunc?*", false));
assert!(!" lunch".matches_pattern("lunc?*", false));
assert!(!"lunc".matches_pattern("lunc?*", false));
} }
#[test] #[test]