#![allow(clippy::disallowed_types)] use std::collections::{HashMap, HashSet}; pub(crate) mod clean; use crate::HtmlSanitizerMode; /// Configuration to sanitize HTML elements and attributes. #[derive(Debug, Default, Clone)] pub struct SanitizerConfig { /// The mode of the sanitizer, if any. mode: Option, /// Change to the list of elements to replace. /// /// The content is a map of element name to their replacement's element name. replace_elements: Option>>, /// Elements to remove. remove_elements: Option>, /// Whether to remove the rich reply fallback. remove_reply_fallback: bool, /// Elements to ignore. ignore_elements: Option>, /// Change to the list of elements to allow. allow_elements: Option>>, /// Change to the list of attributes to replace per element. /// /// The content is a map of element name to a map of attribute name to their replacement's /// attribute name. replace_attrs: Option>>>, /// Removed attributes per element. remove_attrs: Option>>, /// Change to the list of allowed attributes per element. allow_attrs: Option>>>, /// Denied URI schemes per attribute per element. /// /// The content is a map of element name to a map of attribute name to a set of schemes. deny_schemes: Option>>>, /// Change to the list of allowed URI schemes per attribute per element. /// /// The content is a map of element name to a map of attribute name to a set of schemes. #[allow(clippy::type_complexity)] allow_schemes: Option>>>>, /// Removed classes per element. /// /// The content is a map of element name to a set of classes. remove_classes: Option>>, /// Change to the list of allowed classes per element. /// /// The content is a map of element name to a set of classes. allow_classes: Option>>>, /// Maximum nesting level of the elements. max_depth: Option, } impl SanitizerConfig { /// Constructs an empty `SanitizerConfig` that will not filter any element or attribute. /// /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`], /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`], /// [`Self::remove_reply_fallback()`]. /// /// The list of allowed and replaced attributes can be changed with /// [`Self::allow_attributes()`], [`Self::replace_attributes()`], /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`], /// [`Self::allow_classes()`], [`Self::remove_classes()`]. pub fn new() -> Self { Self::default() } /// Constructs a `SanitizerConfig` with the given mode for filtering elements and attributes. /// /// The mode defines the basic list of allowed and replaced elements and attributes and the /// maximum nesting level of elements. /// /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`], /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`], /// [`Self::remove_reply_fallback()`]. /// /// The list of allowed and replaced attributes can be changed with /// [`Self::allow_attributes()`], [`Self::replace_attributes()`], /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`], /// [`Self::allow_classes()`], [`Self::remove_classes()`]. pub fn with_mode(mode: HtmlSanitizerMode) -> Self { Self { mode: Some(mode), ..Default::default() } } /// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in /// the Matrix specification]. /// /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`], /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`], /// [`Self::remove_reply_fallback()`]. /// /// The list of allowed and replaced attributes can be changed with /// [`Self::allow_attributes()`], [`Self::replace_attributes()`], /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`], /// [`Self::allow_classes()`], [`Self::remove_classes()`]. /// /// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Strict)`. /// /// [suggested in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes pub fn strict() -> Self { Self::with_mode(HtmlSanitizerMode::Strict) } /// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in /// the Matrix specification], except a few for improved compatibility: /// /// * The `matrix` scheme is allowed in links. /// /// The list of allowed elements can be changed with [`Self::allow_elements()`], /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`], /// [`Self::remove_reply_fallback()`]. /// /// The list of allowed attributes can be changed with [`Self::allow_attributes()`], /// [`Self::replace_attributes()`], [`Self::remove_attributes()`], [`Self::allow_schemes()`], /// [`Self::deny_schemes()`], [`Self::allow_classes()`], [`Self::remove_classes()`]. /// /// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Compat)`. /// /// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes pub fn compat() -> Self { Self::with_mode(HtmlSanitizerMode::Compat) } /// Change the list of replaced HTML elements. /// /// The given list is added to or replaces the list of replacements of the current mode, /// depending on the [`ListBehavior`]. /// /// The replacement occurs before the removal, so the replaced element should not be in /// the allowed list of elements, but the replacement element should. /// /// # Parameters /// /// * `elements`: The list of element names replacements. pub fn replace_elements( mut self, elements: impl IntoIterator, behavior: ListBehavior, ) -> Self { let content = elements.into_iter().map(|r| r.to_tuple()).collect(); self.replace_elements = Some(List { content, behavior }); self } /// Remove the given HTML elements. /// /// When an element is removed, the element and its children are dropped. If you want to remove /// an element but keep its children, use [`SanitizerConfig::ignore_elements`] or /// [`SanitizerConfig::allow_elements`]. /// /// Removing elements has a higher priority than ignoring or allowing. So if an element is in /// this list, it will always be removed. /// /// # Parameters /// /// * `elements`: The list of element names to remove. pub fn remove_elements(mut self, elements: impl IntoIterator) -> Self { self.remove_elements = Some(elements.into_iter().collect()); self } /// Remove the [rich reply] fallback. /// /// Calling this allows to remove the `mx-reply` element in addition to the list of elements to /// remove. /// /// Removing elements has a higher priority than ignoring or allowing. So if this settings is /// set, `mx-reply` will always be removed. /// /// [rich reply]: https://spec.matrix.org/latest/client-server-api/#rich-replies pub fn remove_reply_fallback(mut self) -> Self { self.remove_reply_fallback = true; self } /// Ignore the given HTML elements. /// /// When an element is ignored, the element is dropped and replaced by its children. If you want /// to drop an element and its children, use [`SanitizerConfig::remove_elements`]. /// /// Removing elements has a lower priority than removing but a higher priority than allowing. /// /// # Parameters /// /// * `elements`: The list of element names to ignore. pub fn ignore_elements(mut self, elements: impl IntoIterator) -> Self { self.ignore_elements = Some(elements.into_iter().collect()); self } /// Change the list of allowed HTML elements. /// /// The given list is added to or replaces the list of allowed elements of the current /// mode, depending on the [`ListBehavior`]. /// /// If an element is not allowed, it is ignored. If no mode is set and no elements are /// explicitly allowed, all elements are allowed. /// /// # Parameters /// /// * `elements`: The list of element names. pub fn allow_elements( mut self, elements: impl IntoIterator, behavior: ListBehavior, ) -> Self { let content = elements.into_iter().collect(); self.allow_elements = Some(List { content, behavior }); self } /// Change the list of replaced attributes per HTML element. /// /// The given list is added to or replaces the list of replacements of the current mode, /// depending on the [`ListBehavior`]. /// /// The replacement occurs before the removal, so the replaced attribute should not be in the /// list of allowed attributes, but the replacement attribute should. Attribute replacement /// occurs before element replacement, so if you want to replace an attribute on an element /// that is set to be replaced, you must use the replaced element's name, not the name of its /// replacement. /// /// # Parameters /// /// * `attrs`: The list of element's attributes replacements. pub fn replace_attributes<'a>( mut self, attrs: impl IntoIterator>, behavior: ListBehavior, ) -> Self { let content = attrs.into_iter().map(|r| r.to_tuple()).collect(); self.replace_attrs = Some(List { content, behavior }); self } /// Remove the given attributes per HTML element. /// /// Removing attributes has a higher priority than allowing. So if an attribute is in /// this list, it will always be removed. /// /// # Parameters /// /// * `attrs`: The list of attributes per element. The value of `parent` is the element name, /// and `properties` contains attribute names. pub fn remove_attributes<'a>( mut self, attrs: impl IntoIterator>, ) -> Self { self.remove_attrs = Some(attrs.into_iter().map(|a| a.to_tuple()).collect()); self } /// Change the list of allowed attributes per HTML element. /// /// The given list is added to or replaces the list of allowed attributes of the current /// mode, depending on the [`ListBehavior`]. /// /// If an attribute is not allowed, it is removed. If no mode is set and no attributes are /// explicitly allowed, all attributes are allowed. /// /// # Parameters /// /// * `attrs`: The list of attributes per element. The value of `parent` is the element name, /// and `properties` contains attribute names. pub fn allow_attributes<'a>( mut self, attrs: impl IntoIterator>, behavior: ListBehavior, ) -> Self { let content = attrs.into_iter().map(|a| a.to_tuple()).collect(); self.allow_attrs = Some(List { content, behavior }); self } /// Deny the given URI schemes per attribute per HTML element. /// /// Denying schemes has a higher priority than allowing. So if a scheme is in /// this list, it will always be denied. /// /// If a scheme is denied, its element is removed, because it is deemed that the element will /// not be usable without it URI. /// /// # Parameters /// /// * `schemes`: The list of schemes per attribute per element. pub fn deny_schemes<'a>( mut self, schemes: impl IntoIterator>, ) -> Self { self.deny_schemes = Some(schemes.into_iter().map(|s| s.to_tuple()).collect()); self } /// Change the list of allowed schemes per attribute per HTML element. /// /// The given list is added to or replaces the list of allowed schemes of the current /// mode, depending on the [`ListBehavior`]. /// /// If a scheme is not allowed, it is denied. If a scheme is denied, its element is ignored, /// because it is deemed that the element will not be usable without it URI. If no mode is set /// and no schemes are explicitly allowed, all schemes are allowed. /// /// # Parameters /// /// * `schemes`: The list of schemes per attribute per element. pub fn allow_schemes<'a>( mut self, schemes: impl IntoIterator>, behavior: ListBehavior, ) -> Self { let content = schemes.into_iter().map(|s| s.to_tuple()).collect(); self.allow_schemes = Some(List { content, behavior }); self } /// Deny the given classes per HTML element. /// /// Removing classes has a higher priority than allowing. So if a class is in /// this list, it will always be removed. /// /// If all the classes of a `class` attribute are removed, the whole attribute is removed. /// /// In the list of classes, the names must match the full class name. `*` can be used as a /// wildcard for any number of characters. So `language` will only match a class named /// `language`, and `language-*` will match any class name starting with `language-`. /// /// # Parameters /// /// * `attrs`: The list of classes per element. The value of `parent` is the element name, and /// `properties` contains classes. pub fn remove_classes<'a>( mut self, classes: impl IntoIterator>, ) -> Self { self.remove_classes = Some(classes.into_iter().map(|c| c.to_tuple()).collect()); self } /// Change the list of allowed classes per HTML element. /// /// The given list is added, removed or replaces the list of allowed classes of the current /// mode, depending on the [`ListBehavior`]. /// /// If a class is not allowed, it is removed. If all the classes of a `class` attribute are /// removed, the whole attribute is removed. If no mode is set and no classes are explicitly /// allowed, all classes are allowed. /// /// In the list of classes, the names must match the full class name. `*` can be used as a /// wildcard for any number of characters. So `language` will only match a class named /// `language`, and `language-*` will match any class name starting with `language-`. /// /// # Parameters /// /// * `attrs`: The list of classes per element. The value of `parent` is the element name, and /// `properties` contains classes. pub fn allow_classes<'a>( mut self, classes: impl IntoIterator>, behavior: ListBehavior, ) -> Self { let content = classes.into_iter().map(|c| c.to_tuple()).collect(); self.allow_classes = Some(List { content, behavior }); self } /// The maximum nesting level of HTML elements. /// /// This overrides the maximum depth set by the mode, if one is set. /// /// All elements that are deeper than the maximum depth will be removed. If no mode is set and /// no maximum depth is explicitly set, elements are not filtered by their nesting level. /// /// # Parameters /// /// * `depth`: The maximum nesting level allowed. pub fn max_depth(mut self, depth: u32) -> Self { self.max_depth = Some(depth); self } } /// A list with a behavior. #[derive(Debug, Clone)] struct List { /// The content of this list. content: T, /// The behavior of this list. behavior: ListBehavior, } impl List { /// Whether this is `ListBehavior::Override`. fn is_override(&self) -> bool { self.behavior == ListBehavior::Override } } /// The behavior of the setting. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[allow(clippy::exhaustive_enums)] pub enum ListBehavior { /// The list replaces the default list of the current mode, if one is set. /// /// If no mode is set, this is the full allow list. Override, /// The list is added to the default list of the current mode, if one is set. /// /// If no mode is set, this is the full allow list. Add, } /// The replacement of a name. #[derive(Debug, Clone, Copy)] #[allow(clippy::exhaustive_structs)] pub struct NameReplacement { /// The name to replace. pub old: &'static str, /// The name of the replacement. pub new: &'static str, } impl NameReplacement { fn to_tuple(self) -> (&'static str, &'static str) { (self.old, self.new) } } /// A list of properties names for a parent. #[allow(clippy::exhaustive_structs)] #[derive(Debug, Clone, Copy)] pub struct PropertiesNames<'a> { /// The name of the parent. pub parent: &'static str, /// The list of properties names. pub properties: &'a [&'static str], } impl PropertiesNames<'_> { fn to_tuple(self) -> (&'static str, HashSet<&'static str>) { let set = self.properties.iter().copied().collect(); (self.parent, set) } } /// The replacement of an element's attributes. #[allow(clippy::exhaustive_structs)] #[derive(Debug, Clone, Copy)] pub struct ElementAttributesReplacement<'a> { /// The name of the element. pub element: &'static str, /// The list of attributes replacements. pub replacements: &'a [NameReplacement], } impl ElementAttributesReplacement<'_> { fn to_tuple(self) -> (&'static str, HashMap<&'static str, &'static str>) { let map = self.replacements.iter().map(|r| r.to_tuple()).collect(); (self.element, map) } } /// An element's attributes' URI schemes. #[allow(clippy::exhaustive_structs)] #[derive(Debug, Clone, Copy)] pub struct ElementAttributesSchemes<'a> { /// The name of the element. pub element: &'static str, /// The list of allowed URI schemes per attribute name. /// /// The value of the `parent` is the attribute name and the properties are schemes. pub attr_schemes: &'a [PropertiesNames<'a>], } impl ElementAttributesSchemes<'_> { fn to_tuple(self) -> (&'static str, HashMap<&'static str, HashSet<&'static str>>) { let map = self.attr_schemes.iter().map(|s| s.to_tuple()).collect(); (self.element, map) } }