Merge remote-tracking branch 'upstream/main' into conduwuit-changes
This commit is contained in:
commit
e6e8462c05
@ -4,12 +4,16 @@ Breaking Changes:
|
|||||||
|
|
||||||
- Do not export `Node` in the public API, it is not usable on its own and it is
|
- Do not export `Node` in the public API, it is not usable on its own and it is
|
||||||
not in the output of any public method.
|
not in the output of any public method.
|
||||||
|
- `Html::sanitize_with` now takes a reference to `SanitizerConfig`.
|
||||||
|
|
||||||
Improvements:
|
Improvements:
|
||||||
|
|
||||||
- Add support for deprecated HTML tags, according to Matrix 1.10
|
- Add support for deprecated HTML tags, according to Matrix 1.10
|
||||||
- Allow to navigate through the HTML tree with `Html::first_child()`,
|
- Allow to navigate through the HTML tree with `Html::first_child()`,
|
||||||
`Html::last_child()` or `Html::children()`
|
`Html::last_child()` or `Html::children()`
|
||||||
|
- Add `ElementData::to_matrix` to convert it to a type using enums for HTML
|
||||||
|
elements and attributes suggested by the Matrix Specification, behind the
|
||||||
|
`matrix` cargo feature.
|
||||||
|
|
||||||
# 0.1.0
|
# 0.1.0
|
||||||
|
|
||||||
|
@ -14,9 +14,16 @@ rust-version = { workspace = true }
|
|||||||
all-features = true
|
all-features = true
|
||||||
rustdoc-args = ["--cfg", "docsrs"]
|
rustdoc-args = ["--cfg", "docsrs"]
|
||||||
|
|
||||||
|
[features]
|
||||||
|
matrix = ["dep:ruma-common"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
as_variant = { workspace = true }
|
as_variant = { workspace = true }
|
||||||
html5ever = "0.27.0"
|
html5ever = "0.27.0"
|
||||||
phf = { version = "0.11.1", features = ["macros"] }
|
phf = { version = "0.11.1", features = ["macros"] }
|
||||||
|
ruma-common = { workspace = true, optional = true }
|
||||||
tracing = { workspace = true, features = ["attributes"] }
|
tracing = { workspace = true, features = ["attributes"] }
|
||||||
wildmatch = "2.0.0"
|
wildmatch = "2.0.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
assert_matches2 = { workspace = true }
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
//! Convenience methods and types to sanitize HTML messages.
|
//! Convenience methods and types to sanitize HTML messages.
|
||||||
|
|
||||||
use crate::{Html, SanitizerConfig};
|
use crate::{Html, HtmlSanitizerMode, SanitizerConfig};
|
||||||
|
|
||||||
/// Sanitize the given HTML string.
|
/// Sanitize the given HTML string.
|
||||||
///
|
///
|
||||||
@ -24,21 +24,7 @@ pub fn sanitize_html(
|
|||||||
config = config.remove_reply_fallback();
|
config = config.remove_reply_fallback();
|
||||||
}
|
}
|
||||||
|
|
||||||
sanitize_inner(s, config)
|
sanitize_inner(s, &config)
|
||||||
}
|
|
||||||
|
|
||||||
/// What HTML [tags and attributes] should be kept by the sanitizer.
|
|
||||||
///
|
|
||||||
/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
||||||
#[allow(clippy::exhaustive_enums)]
|
|
||||||
pub enum HtmlSanitizerMode {
|
|
||||||
/// Keep only the tags and attributes listed in the Matrix specification.
|
|
||||||
Strict,
|
|
||||||
|
|
||||||
/// Like `Strict` mode, with additional tags and attributes that are not yet included in
|
|
||||||
/// the spec, but are reasonable to keep.
|
|
||||||
Compat,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether to remove the [rich reply fallback] while sanitizing.
|
/// Whether to remove the [rich reply fallback] while sanitizing.
|
||||||
@ -62,10 +48,10 @@ pub enum RemoveReplyFallback {
|
|||||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||||
pub fn remove_html_reply_fallback(s: &str) -> String {
|
pub fn remove_html_reply_fallback(s: &str) -> String {
|
||||||
let config = SanitizerConfig::new().remove_reply_fallback();
|
let config = SanitizerConfig::new().remove_reply_fallback();
|
||||||
sanitize_inner(s, config)
|
sanitize_inner(s, &config)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sanitize_inner(s: &str, config: SanitizerConfig) -> String {
|
fn sanitize_inner(s: &str, config: &SanitizerConfig) -> String {
|
||||||
let mut html = Html::parse(s);
|
let mut html = Html::parse(s);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(config);
|
||||||
html.to_string()
|
html.to_string()
|
||||||
|
@ -10,6 +10,9 @@ use html5ever::{
|
|||||||
};
|
};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
|
#[cfg(feature = "matrix")]
|
||||||
|
pub mod matrix;
|
||||||
|
|
||||||
use crate::SanitizerConfig;
|
use crate::SanitizerConfig;
|
||||||
|
|
||||||
/// An HTML fragment.
|
/// An HTML fragment.
|
||||||
@ -44,11 +47,11 @@ impl Html {
|
|||||||
/// `SanitizerConfig::compat().remove_reply_fallback()`.
|
/// `SanitizerConfig::compat().remove_reply_fallback()`.
|
||||||
pub fn sanitize(&mut self) {
|
pub fn sanitize(&mut self) {
|
||||||
let config = SanitizerConfig::compat().remove_reply_fallback();
|
let config = SanitizerConfig::compat().remove_reply_fallback();
|
||||||
self.sanitize_with(config);
|
self.sanitize_with(&config);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sanitize this HTML according to the given configuration.
|
/// Sanitize this HTML according to the given configuration.
|
||||||
pub fn sanitize_with(&mut self, config: SanitizerConfig) {
|
pub fn sanitize_with(&mut self, config: &SanitizerConfig) {
|
||||||
config.clean(self);
|
config.clean(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -431,6 +434,16 @@ pub struct ElementData {
|
|||||||
pub attrs: BTreeSet<Attribute>,
|
pub attrs: BTreeSet<Attribute>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ElementData {
|
||||||
|
/// Convert this element data to typed data as [suggested by the Matrix Specification][spec].
|
||||||
|
///
|
||||||
|
/// [spec]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
#[cfg(feature = "matrix")]
|
||||||
|
pub fn to_matrix(&self) -> matrix::MatrixElementData {
|
||||||
|
matrix::MatrixElementData::parse(&self.name, &self.attrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A reference to an HTML node.
|
/// A reference to an HTML node.
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
|
724
crates/ruma-html/src/html/matrix.rs
Normal file
724
crates/ruma-html/src/html/matrix.rs
Normal file
@ -0,0 +1,724 @@
|
|||||||
|
//! Types to work with HTML elements and attributes [suggested by the Matrix Specification][spec].
|
||||||
|
//!
|
||||||
|
//! [spec]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
|
use html5ever::{namespace_url, ns, tendril::StrTendril, Attribute, QualName};
|
||||||
|
use ruma_common::{
|
||||||
|
IdParseError, MatrixToError, MatrixToUri, MatrixUri, MatrixUriError, MxcUri, OwnedMxcUri,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::sanitizer_config::clean::{
|
||||||
|
ALLOWED_SCHEMES_A_HREF_COMPAT, ALLOWED_SCHEMES_A_HREF_STRICT,
|
||||||
|
};
|
||||||
|
|
||||||
|
const CLASS_LANGUAGE_PREFIX: &str = "language-";
|
||||||
|
|
||||||
|
/// The data of a Matrix HTML element.
|
||||||
|
///
|
||||||
|
/// This is a helper type to work with elements [suggested by the Matrix Specification][spec].
|
||||||
|
///
|
||||||
|
/// This performs a lossless conversion from [`ElementData`]. Unsupported elements are represented
|
||||||
|
/// by [`MatrixElement::Other`] and unsupported attributes are listed in the `attrs` field.
|
||||||
|
///
|
||||||
|
/// [`ElementData`]: crate::ElementData
|
||||||
|
/// [spec]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[allow(clippy::exhaustive_structs)]
|
||||||
|
pub struct MatrixElementData {
|
||||||
|
/// The HTML element and its supported data.
|
||||||
|
pub element: MatrixElement,
|
||||||
|
|
||||||
|
/// The unsupported attributes found on the element.
|
||||||
|
pub attrs: BTreeSet<Attribute>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MatrixElementData {
|
||||||
|
/// Parse a `MatrixElementData` from the given qualified name and attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
pub(super) fn parse(name: &QualName, attrs: &BTreeSet<Attribute>) -> Self {
|
||||||
|
let (element, attrs) = MatrixElement::parse(name, attrs);
|
||||||
|
Self { element, attrs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A Matrix HTML element.
|
||||||
|
///
|
||||||
|
/// All the elements [suggested by the Matrix Specification][spec] have a variant. The others are
|
||||||
|
/// handled by the fallback `Other` variant.
|
||||||
|
///
|
||||||
|
/// Suggested attributes are represented as optional fields on the variants structs.
|
||||||
|
///
|
||||||
|
/// [spec]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub enum MatrixElement {
|
||||||
|
/// [`<del>`], a deleted text element.
|
||||||
|
///
|
||||||
|
/// [`<del>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/del
|
||||||
|
Del,
|
||||||
|
|
||||||
|
/// [`<h1>-<h6>`], a section heading element.
|
||||||
|
///
|
||||||
|
/// [`<h1>-<h6>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Heading_Elements
|
||||||
|
H(HeadingData),
|
||||||
|
|
||||||
|
/// [`<blockquote>`], a block quotation element.
|
||||||
|
///
|
||||||
|
/// [`<blockquote>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/blockquote
|
||||||
|
Blockquote,
|
||||||
|
|
||||||
|
/// [`<p>`], a paragraph element.
|
||||||
|
///
|
||||||
|
/// [`<p>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/p
|
||||||
|
P,
|
||||||
|
|
||||||
|
/// [`<a>`], an anchor element.
|
||||||
|
///
|
||||||
|
/// [`<a>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a
|
||||||
|
A(AnchorData),
|
||||||
|
|
||||||
|
/// [`<ul>`], an unordered list element.
|
||||||
|
///
|
||||||
|
/// [`<ul>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ul
|
||||||
|
Ul,
|
||||||
|
|
||||||
|
/// [`<ol>`], an ordered list element.
|
||||||
|
///
|
||||||
|
/// [`<ol>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol
|
||||||
|
Ol(OrderedListData),
|
||||||
|
|
||||||
|
/// [`<sup>`], a superscript element.
|
||||||
|
///
|
||||||
|
/// [`<sup>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/sup
|
||||||
|
Sup,
|
||||||
|
|
||||||
|
/// [`<sub>`], a subscript element.
|
||||||
|
///
|
||||||
|
/// [`<sub>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/sub
|
||||||
|
Sub,
|
||||||
|
|
||||||
|
/// [`<li>`], a list item element.
|
||||||
|
///
|
||||||
|
/// [`<li>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/li
|
||||||
|
Li,
|
||||||
|
|
||||||
|
/// [`<b>`], a bring attention to element.
|
||||||
|
///
|
||||||
|
/// [`<b>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/b
|
||||||
|
B,
|
||||||
|
|
||||||
|
/// [`<i>`], an idiomatic text element.
|
||||||
|
///
|
||||||
|
/// [`<i>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/i
|
||||||
|
I,
|
||||||
|
|
||||||
|
/// [`<u>`], an unarticulated annotation element.
|
||||||
|
///
|
||||||
|
/// [`<u>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/u
|
||||||
|
U,
|
||||||
|
|
||||||
|
/// [`<strong>`], a strong importance element.
|
||||||
|
///
|
||||||
|
/// [`<strong>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/strong
|
||||||
|
Strong,
|
||||||
|
|
||||||
|
/// [`<em>`], an emphasis element.
|
||||||
|
///
|
||||||
|
/// [`<em>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/em
|
||||||
|
Em,
|
||||||
|
|
||||||
|
/// [`<s>`], a strikethrough element.
|
||||||
|
///
|
||||||
|
/// [`<s>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/s
|
||||||
|
S,
|
||||||
|
|
||||||
|
/// [`<code>`], an inline code element.
|
||||||
|
///
|
||||||
|
/// [`<code>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/code
|
||||||
|
Code(CodeData),
|
||||||
|
|
||||||
|
/// [`<hr>`], a thematic break element.
|
||||||
|
///
|
||||||
|
/// [`<hr>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/hr
|
||||||
|
Hr,
|
||||||
|
|
||||||
|
/// [`<br>`], a line break element.
|
||||||
|
///
|
||||||
|
/// [`<br>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/br
|
||||||
|
Br,
|
||||||
|
|
||||||
|
/// [`<div>`], a content division element.
|
||||||
|
///
|
||||||
|
/// [`<div>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/div
|
||||||
|
Div,
|
||||||
|
|
||||||
|
/// [`<table>`], a table element.
|
||||||
|
///
|
||||||
|
/// [`<table>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/table
|
||||||
|
Table,
|
||||||
|
|
||||||
|
/// [`<thead>`], a table head element.
|
||||||
|
///
|
||||||
|
/// [`<thead>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/thead
|
||||||
|
Thead,
|
||||||
|
|
||||||
|
/// [`<tbody>`], a table body element.
|
||||||
|
///
|
||||||
|
/// [`<tbody>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/tbody
|
||||||
|
Tbody,
|
||||||
|
|
||||||
|
/// [`<tr>`], a table row element.
|
||||||
|
///
|
||||||
|
/// [`<tr>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/tr
|
||||||
|
Tr,
|
||||||
|
|
||||||
|
/// [`<th>`], a table header element.
|
||||||
|
///
|
||||||
|
/// [`<th>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/th
|
||||||
|
Th,
|
||||||
|
|
||||||
|
/// [`<td>`], a table data cell element.
|
||||||
|
///
|
||||||
|
/// [`<td>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td
|
||||||
|
Td,
|
||||||
|
|
||||||
|
/// [`<caption>`], a table caption element.
|
||||||
|
///
|
||||||
|
/// [`<caption>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/caption
|
||||||
|
Caption,
|
||||||
|
|
||||||
|
/// [`<pre>`], a preformatted text element.
|
||||||
|
///
|
||||||
|
/// [`<pre>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/pre
|
||||||
|
Pre,
|
||||||
|
|
||||||
|
/// [`<span>`], a content span element.
|
||||||
|
///
|
||||||
|
/// [`<span>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/span
|
||||||
|
Span(SpanData),
|
||||||
|
|
||||||
|
/// [`<img>`], an image embed element.
|
||||||
|
///
|
||||||
|
/// [`<img>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img
|
||||||
|
Img(ImageData),
|
||||||
|
|
||||||
|
/// [`<details>`], a details disclosure element.
|
||||||
|
///
|
||||||
|
/// [`<details>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/details
|
||||||
|
Details,
|
||||||
|
|
||||||
|
/// [`<summary>`], a disclosure summary element.
|
||||||
|
///
|
||||||
|
/// [`<summary>`]: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/summary
|
||||||
|
Summary,
|
||||||
|
|
||||||
|
/// [`mx-reply`], a Matrix rich reply fallback element.
|
||||||
|
///
|
||||||
|
/// [`mx-reply`]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||||
|
MatrixReply,
|
||||||
|
|
||||||
|
/// An HTML element that is not in the suggested list.
|
||||||
|
Other(QualName),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MatrixElement {
|
||||||
|
/// Parse a `MatrixElement` from the given qualified name and attributes.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the constructed `Element` and the list of remaining unsupported
|
||||||
|
/// attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
fn parse(name: &QualName, attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
|
||||||
|
if name.ns != ns!(html) {
|
||||||
|
return (Self::Other(name.clone()), attrs.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
match name.local.as_bytes() {
|
||||||
|
b"del" => (Self::Del, attrs.clone()),
|
||||||
|
b"h1" => (Self::H(HeadingData::new(1)), attrs.clone()),
|
||||||
|
b"h2" => (Self::H(HeadingData::new(2)), attrs.clone()),
|
||||||
|
b"h3" => (Self::H(HeadingData::new(3)), attrs.clone()),
|
||||||
|
b"h4" => (Self::H(HeadingData::new(4)), attrs.clone()),
|
||||||
|
b"h5" => (Self::H(HeadingData::new(5)), attrs.clone()),
|
||||||
|
b"h6" => (Self::H(HeadingData::new(6)), attrs.clone()),
|
||||||
|
b"blockquote" => (Self::Blockquote, attrs.clone()),
|
||||||
|
b"p" => (Self::P, attrs.clone()),
|
||||||
|
b"a" => {
|
||||||
|
let (data, attrs) = AnchorData::parse(attrs);
|
||||||
|
(Self::A(data), attrs)
|
||||||
|
}
|
||||||
|
b"ul" => (Self::Ul, attrs.clone()),
|
||||||
|
b"ol" => {
|
||||||
|
let (data, attrs) = OrderedListData::parse(attrs);
|
||||||
|
(Self::Ol(data), attrs)
|
||||||
|
}
|
||||||
|
b"sup" => (Self::Sup, attrs.clone()),
|
||||||
|
b"sub" => (Self::Sub, attrs.clone()),
|
||||||
|
b"li" => (Self::Li, attrs.clone()),
|
||||||
|
b"b" => (Self::B, attrs.clone()),
|
||||||
|
b"i" => (Self::I, attrs.clone()),
|
||||||
|
b"u" => (Self::U, attrs.clone()),
|
||||||
|
b"strong" => (Self::Strong, attrs.clone()),
|
||||||
|
b"em" => (Self::Em, attrs.clone()),
|
||||||
|
b"s" => (Self::S, attrs.clone()),
|
||||||
|
b"code" => {
|
||||||
|
let (data, attrs) = CodeData::parse(attrs);
|
||||||
|
(Self::Code(data), attrs)
|
||||||
|
}
|
||||||
|
b"hr" => (Self::Hr, attrs.clone()),
|
||||||
|
b"br" => (Self::Br, attrs.clone()),
|
||||||
|
b"div" => (Self::Div, attrs.clone()),
|
||||||
|
b"table" => (Self::Table, attrs.clone()),
|
||||||
|
b"thead" => (Self::Thead, attrs.clone()),
|
||||||
|
b"tbody" => (Self::Tbody, attrs.clone()),
|
||||||
|
b"tr" => (Self::Tr, attrs.clone()),
|
||||||
|
b"th" => (Self::Th, attrs.clone()),
|
||||||
|
b"td" => (Self::Td, attrs.clone()),
|
||||||
|
b"caption" => (Self::Caption, attrs.clone()),
|
||||||
|
b"pre" => (Self::Pre, attrs.clone()),
|
||||||
|
b"span" => {
|
||||||
|
let (data, attrs) = SpanData::parse(attrs);
|
||||||
|
(Self::Span(data), attrs)
|
||||||
|
}
|
||||||
|
b"img" => {
|
||||||
|
let (data, attrs) = ImageData::parse(attrs);
|
||||||
|
(Self::Img(data), attrs)
|
||||||
|
}
|
||||||
|
b"details" => (Self::Details, attrs.clone()),
|
||||||
|
b"summary" => (Self::Summary, attrs.clone()),
|
||||||
|
b"mx-reply" => (Self::MatrixReply, attrs.clone()),
|
||||||
|
_ => (Self::Other(name.clone()), attrs.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The supported data of a `<h1>-<h6>` HTML element.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct HeadingData {
|
||||||
|
/// The level of the heading.
|
||||||
|
pub level: HeadingLevel,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HeadingData {
|
||||||
|
/// Constructs a new `HeadingData` with the given heading level.
|
||||||
|
fn new(level: u8) -> Self {
|
||||||
|
Self { level: HeadingLevel(level) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The level of a heading element.
|
||||||
|
///
|
||||||
|
/// The supported levels range from 1 (highest) to 6 (lowest). Other levels cannot construct this
|
||||||
|
/// and do not use the [`MatrixElement::H`] variant.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct HeadingLevel(u8);
|
||||||
|
|
||||||
|
impl HeadingLevel {
|
||||||
|
/// The value of the level.
|
||||||
|
///
|
||||||
|
/// Can only be a value between 1 and 6 included.
|
||||||
|
pub fn value(&self) -> u8 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<u8> for HeadingLevel {
|
||||||
|
fn eq(&self, other: &u8) -> bool {
|
||||||
|
self.0.eq(other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The supported data of a `<a>` HTML element.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct AnchorData {
|
||||||
|
/// The name of the anchor.
|
||||||
|
pub name: Option<StrTendril>,
|
||||||
|
|
||||||
|
/// Where to display the linked URL.
|
||||||
|
pub target: Option<StrTendril>,
|
||||||
|
|
||||||
|
/// The URL that the hyperlink points to.
|
||||||
|
pub href: Option<AnchorUri>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AnchorData {
|
||||||
|
/// Construct an empty `AnchorData`.
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { name: None, target: None, href: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the given attributes to construct a new `AnchorData`.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the constructed data and the remaining unsupported attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
|
||||||
|
let mut data = Self::new();
|
||||||
|
let mut remaining_attrs = BTreeSet::new();
|
||||||
|
|
||||||
|
for attr in attrs {
|
||||||
|
if attr.name.ns != ns!() {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match attr.name.local.as_bytes() {
|
||||||
|
b"name" => {
|
||||||
|
data.name = Some(attr.value.clone());
|
||||||
|
}
|
||||||
|
b"target" => {
|
||||||
|
data.target = Some(attr.value.clone());
|
||||||
|
}
|
||||||
|
b"href" => {
|
||||||
|
if let Some(uri) = AnchorUri::parse(&attr.value) {
|
||||||
|
data.href = Some(uri);
|
||||||
|
} else {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(data, remaining_attrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A URI as a value for the `href` attribute of a `<a>` HTML element.
|
||||||
|
///
|
||||||
|
/// This is a helper type that recognizes `matrix:` and `https://matrix.to` URIs to detect mentions.
|
||||||
|
///
|
||||||
|
/// If the URI is an invalid Matrix URI or does not use one of the suggested schemes, the `href`
|
||||||
|
/// attribute will be in the `attrs` list of [`MatrixElementData`].
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub enum AnchorUri {
|
||||||
|
/// A `matrix:` URI.
|
||||||
|
Matrix(MatrixUri),
|
||||||
|
/// A `https://matrix.to` URI.
|
||||||
|
MatrixTo(MatrixToUri),
|
||||||
|
/// An other URL using one of the suggested schemes.
|
||||||
|
///
|
||||||
|
/// Those schemes are:
|
||||||
|
///
|
||||||
|
/// * `https`
|
||||||
|
/// * `http`
|
||||||
|
/// * `ftp`
|
||||||
|
/// * `mailto`
|
||||||
|
/// * `magnet`
|
||||||
|
Other(StrTendril),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AnchorUri {
|
||||||
|
/// Parse the given string to construct a new `AnchorUri`.
|
||||||
|
fn parse(value: &StrTendril) -> Option<Self> {
|
||||||
|
let s = value.as_ref();
|
||||||
|
|
||||||
|
// Check if it starts with a supported scheme.
|
||||||
|
let mut allowed_schemes =
|
||||||
|
ALLOWED_SCHEMES_A_HREF_STRICT.iter().chain(ALLOWED_SCHEMES_A_HREF_COMPAT.iter());
|
||||||
|
if !allowed_schemes.any(|scheme| s.starts_with(&format!("{scheme}:"))) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
match MatrixUri::parse(s) {
|
||||||
|
Ok(uri) => return Some(Self::Matrix(uri)),
|
||||||
|
// It's not a `matrix:` URI, continue.
|
||||||
|
Err(IdParseError::InvalidMatrixUri(MatrixUriError::WrongScheme)) => {}
|
||||||
|
// The URI is invalid.
|
||||||
|
_ => return None,
|
||||||
|
}
|
||||||
|
|
||||||
|
match MatrixToUri::parse(s) {
|
||||||
|
Ok(uri) => return Some(Self::MatrixTo(uri)),
|
||||||
|
// It's not a `https://matrix.to` URI, continue.
|
||||||
|
Err(IdParseError::InvalidMatrixToUri(MatrixToError::WrongBaseUrl)) => {}
|
||||||
|
// The URI is invalid.
|
||||||
|
_ => return None,
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Self::Other(value.clone()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The supported data of a `<ol>` HTML element.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct OrderedListData {
|
||||||
|
/// An integer to start counting from for the list items.
|
||||||
|
///
|
||||||
|
/// If parsing the integer from a string fails, the attribute will be in the `attrs` list of
|
||||||
|
/// [`MatrixElementData`].
|
||||||
|
pub start: Option<i64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OrderedListData {
|
||||||
|
/// Construct an empty `OrderedListData`.
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { start: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the given attributes to construct a new `OrderedListData`.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the constructed data and the remaining unsupported attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
|
||||||
|
let mut data = Self::new();
|
||||||
|
let mut remaining_attrs = BTreeSet::new();
|
||||||
|
|
||||||
|
for attr in attrs {
|
||||||
|
if attr.name.ns != ns!() {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match attr.name.local.as_bytes() {
|
||||||
|
b"start" => {
|
||||||
|
if let Ok(start) = attr.value.parse() {
|
||||||
|
data.start = Some(start);
|
||||||
|
} else {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(data, remaining_attrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The supported data of a `<code>` HTML element.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct CodeData {
|
||||||
|
/// The language of the code, for syntax highlighting.
|
||||||
|
///
|
||||||
|
/// This corresponds to the `class` attribute with a value that starts with the
|
||||||
|
/// `language-` prefix. The prefix is stripped from the value.
|
||||||
|
///
|
||||||
|
/// If there are other classes in the `class` attribute, the whole attribute will be in the
|
||||||
|
/// `attrs` list of [`MatrixElementData`].
|
||||||
|
pub language: Option<StrTendril>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CodeData {
|
||||||
|
/// Construct an empty `CodeData`.
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { language: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the given attributes to construct a new `CodeData`.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the constructed data and the remaining unsupported attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
|
||||||
|
let mut data = Self::new();
|
||||||
|
let mut remaining_attrs = BTreeSet::new();
|
||||||
|
|
||||||
|
for attr in attrs {
|
||||||
|
if attr.name.ns != ns!() {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match attr.name.local.as_bytes() {
|
||||||
|
b"class" => {
|
||||||
|
let value_str = attr.value.as_ref();
|
||||||
|
|
||||||
|
// The attribute could contain several classes separated by spaces, so let's
|
||||||
|
// find the first class starting with `language-`.
|
||||||
|
for (match_start, _) in value_str.match_indices(CLASS_LANGUAGE_PREFIX) {
|
||||||
|
// The class name must either be at the start of the string or preceded by a
|
||||||
|
// space.
|
||||||
|
if match_start != 0
|
||||||
|
&& !value_str.as_bytes()[match_start - 1].is_ascii_whitespace()
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let language_start = match_start + CLASS_LANGUAGE_PREFIX.len();
|
||||||
|
|
||||||
|
let str_end = &value_str[language_start..];
|
||||||
|
let language_end = str_end
|
||||||
|
.find(|c: char| c.is_ascii_whitespace())
|
||||||
|
.map(|pos| language_start + pos)
|
||||||
|
.unwrap_or(value_str.len());
|
||||||
|
|
||||||
|
if language_end == language_start {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let sub_len = (language_end - language_start) as u32;
|
||||||
|
data.language = Some(attr.value.subtendril(language_start as u32, sub_len));
|
||||||
|
|
||||||
|
if match_start != 0 || language_end != value_str.len() {
|
||||||
|
// There are other classes, keep the whole attribute for the conversion
|
||||||
|
// to be lossless.
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if data.language.is_none() {
|
||||||
|
// We didn't find the class we want, keep the whole attribute.
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(data, remaining_attrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The supported data of a `<span>` HTML element.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct SpanData {
|
||||||
|
/// `data-mx-bg-color`, the background color of the text.
|
||||||
|
pub bg_color: Option<StrTendril>,
|
||||||
|
|
||||||
|
/// `data-mx-color`, the foreground color of the text.
|
||||||
|
pub color: Option<StrTendril>,
|
||||||
|
|
||||||
|
/// `data-mx-spoiler`, a Matrix [spoiler message].
|
||||||
|
///
|
||||||
|
/// The value is the reason of the spoiler. If the string is empty, this is a spoiler
|
||||||
|
/// without a reason.
|
||||||
|
///
|
||||||
|
/// [spoiler message]: https://spec.matrix.org/latest/client-server-api/#spoiler-messages
|
||||||
|
pub spoiler: Option<StrTendril>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SpanData {
|
||||||
|
/// Construct an empty `SpanData`.
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { bg_color: None, color: None, spoiler: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the given attributes to construct a new `SpanData`.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the constructed data and the remaining unsupported attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
|
||||||
|
let mut data = Self::new();
|
||||||
|
let mut remaining_attrs = BTreeSet::new();
|
||||||
|
|
||||||
|
for attr in attrs {
|
||||||
|
if attr.name.ns != ns!() {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match attr.name.local.as_bytes() {
|
||||||
|
b"data-mx-bg-color" => {
|
||||||
|
data.bg_color = Some(attr.value.clone());
|
||||||
|
}
|
||||||
|
b"data-mx-color" => data.color = Some(attr.value.clone()),
|
||||||
|
b"data-mx-spoiler" => {
|
||||||
|
data.spoiler = Some(attr.value.clone());
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(data, remaining_attrs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The supported data of a `<img>` HTML element.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct ImageData {
|
||||||
|
/// The intrinsic width of the image, in pixels.
|
||||||
|
///
|
||||||
|
/// If parsing the integer from a string fails, the attribute will be in the `attrs` list of
|
||||||
|
/// `MatrixElementData`.
|
||||||
|
pub width: Option<i64>,
|
||||||
|
|
||||||
|
/// The intrinsic height of the image, in pixels.
|
||||||
|
///
|
||||||
|
/// If parsing the integer from a string fails, the attribute will be in the `attrs` list of
|
||||||
|
/// [`MatrixElementData`].
|
||||||
|
pub height: Option<i64>,
|
||||||
|
|
||||||
|
/// Text that can replace the image.
|
||||||
|
pub alt: Option<StrTendril>,
|
||||||
|
|
||||||
|
/// Text representing advisory information about the image.
|
||||||
|
pub title: Option<StrTendril>,
|
||||||
|
|
||||||
|
/// The image URL.
|
||||||
|
///
|
||||||
|
/// It this is not a valid `mxc:` URI, the attribute will be in the `attrs` list of
|
||||||
|
/// [`MatrixElementData`].
|
||||||
|
pub src: Option<OwnedMxcUri>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImageData {
|
||||||
|
/// Construct an empty `ImageData`.
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { width: None, height: None, alt: None, title: None, src: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the given attributes to construct a new `ImageData`.
|
||||||
|
///
|
||||||
|
/// Returns a tuple containing the constructed data and the remaining unsupported attributes.
|
||||||
|
#[allow(clippy::mutable_key_type)]
|
||||||
|
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
|
||||||
|
let mut data = Self::new();
|
||||||
|
let mut remaining_attrs = BTreeSet::new();
|
||||||
|
|
||||||
|
for attr in attrs {
|
||||||
|
if attr.name.ns != ns!() {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match attr.name.local.as_bytes() {
|
||||||
|
b"width" => {
|
||||||
|
if let Ok(width) = attr.value.parse() {
|
||||||
|
data.width = Some(width);
|
||||||
|
} else {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b"height" => {
|
||||||
|
if let Ok(height) = attr.value.parse() {
|
||||||
|
data.height = Some(height);
|
||||||
|
} else {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b"alt" => data.alt = Some(attr.value.clone()),
|
||||||
|
b"title" => data.title = Some(attr.value.clone()),
|
||||||
|
b"src" => {
|
||||||
|
let uri = <&MxcUri>::from(attr.value.as_ref());
|
||||||
|
if uri.validate().is_ok() {
|
||||||
|
data.src = Some(uri.to_owned());
|
||||||
|
} else {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
remaining_attrs.insert(attr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(data, remaining_attrs)
|
||||||
|
}
|
||||||
|
}
|
@ -6,12 +6,44 @@
|
|||||||
//! the Matrix protocol. It should be able to be used to interact with any HTML
|
//! the Matrix protocol. It should be able to be used to interact with any HTML
|
||||||
//! document but will offer APIs focused on specificities of HTML in the Matrix
|
//! document but will offer APIs focused on specificities of HTML in the Matrix
|
||||||
//! specification..
|
//! specification..
|
||||||
|
//!
|
||||||
|
//! # Features
|
||||||
|
//!
|
||||||
|
//! * `matrix` - Allow to convert HTML elements data into enums with variants for elements and
|
||||||
|
//! attributes [suggested by the Matrix Specification][spec].
|
||||||
|
//!
|
||||||
|
//! [spec]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
|
||||||
#![warn(missing_docs)]
|
#![warn(missing_docs)]
|
||||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
|
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
|
||||||
|
|
||||||
|
pub use html5ever::{tendril::StrTendril, Attribute, LocalName, Namespace, Prefix, QualName};
|
||||||
|
|
||||||
mod helpers;
|
mod helpers;
|
||||||
mod html;
|
mod html;
|
||||||
mod sanitizer_config;
|
mod sanitizer_config;
|
||||||
|
|
||||||
pub use self::{helpers::*, html::*, sanitizer_config::SanitizerConfig};
|
pub use self::{helpers::*, html::*, sanitizer_config::*};
|
||||||
|
|
||||||
|
/// What [HTML elements and attributes] should be kept by the sanitizer.
|
||||||
|
///
|
||||||
|
/// [HTML elements and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
#[allow(clippy::exhaustive_enums)]
|
||||||
|
pub enum HtmlSanitizerMode {
|
||||||
|
/// Keep only the elements and attributes suggested in the Matrix specification.
|
||||||
|
///
|
||||||
|
/// In addition to filtering elements and attributes listed in the Matrix specification, it
|
||||||
|
/// also removes elements that are nested more than 100 levels deep.
|
||||||
|
///
|
||||||
|
/// Deprecated elements and attributes are also replaced when applicable.
|
||||||
|
Strict,
|
||||||
|
|
||||||
|
/// Like `Strict` mode, with additional elements and attributes that are not yet included in
|
||||||
|
/// the spec, but are reasonable to keep.
|
||||||
|
///
|
||||||
|
/// Differences with `Strict` mode:
|
||||||
|
///
|
||||||
|
/// * The `matrix` scheme is allowed in links.
|
||||||
|
Compat,
|
||||||
|
}
|
||||||
|
@ -1,375 +1,498 @@
|
|||||||
use html5ever::{tendril::StrTendril, Attribute, LocalName};
|
#![allow(clippy::disallowed_types)]
|
||||||
use phf::{phf_map, phf_set, Map, Set};
|
|
||||||
use wildmatch::WildMatch;
|
|
||||||
|
|
||||||
use crate::html::{ElementData, Html, NodeData};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
/// Configuration to sanitize HTML tags and attributes.
|
pub(crate) mod clean;
|
||||||
|
|
||||||
|
use crate::HtmlSanitizerMode;
|
||||||
|
|
||||||
|
/// Configuration to sanitize HTML elements and attributes.
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Default, Clone)]
|
||||||
pub struct SanitizerConfig {
|
pub struct SanitizerConfig {
|
||||||
/// The allowed HTML tags.
|
/// The mode of the sanitizer, if any.
|
||||||
|
mode: Option<HtmlSanitizerMode>,
|
||||||
|
|
||||||
|
/// Change to the list of elements to replace.
|
||||||
///
|
///
|
||||||
/// If this is `None`, all tags are allowed.
|
/// The content is a map of element name to their replacement's element name.
|
||||||
allowed_tags: Option<&'static Set<&'static str>>,
|
replace_elements: Option<List<HashMap<&'static str, &'static str>>>,
|
||||||
|
|
||||||
/// The allowed deprecated HTML tags.
|
/// Elements to remove.
|
||||||
///
|
remove_elements: Option<HashSet<&'static str>>,
|
||||||
/// This is a map of allowed deprecated tag to their replacement tag.
|
|
||||||
deprecated_tags: Option<&'static Map<&'static str, &'static str>>,
|
|
||||||
|
|
||||||
/// The allowed attributes per tag.
|
/// Whether to remove the rich reply fallback.
|
||||||
///
|
|
||||||
/// If this is `None`, all attributes are allowed.
|
|
||||||
allowed_attrs: Option<&'static Map<&'static str, &'static Set<&'static str>>>,
|
|
||||||
|
|
||||||
/// The allowed deprecated attributes per tag.
|
|
||||||
///
|
|
||||||
/// This is a map of tag to a map of allowed deprecated attribute to their replacement
|
|
||||||
/// attribute.
|
|
||||||
deprecated_attrs: Option<&'static Map<&'static str, &'static Map<&'static str, &'static str>>>,
|
|
||||||
|
|
||||||
/// The allowed URI schemes per tag.
|
|
||||||
///
|
|
||||||
/// If this is `None`, all schemes are allowed.
|
|
||||||
allowed_schemes: Option<&'static Map<&'static str, &'static Set<&'static str>>>,
|
|
||||||
|
|
||||||
/// The allowed classes per tag.
|
|
||||||
///
|
|
||||||
/// If this is `None`, all classes are allowed.
|
|
||||||
allowed_classes: Option<&'static Map<&'static str, &'static Set<&'static str>>>,
|
|
||||||
|
|
||||||
/// The maximum nesting level of the tags.
|
|
||||||
max_depth: Option<u32>,
|
|
||||||
|
|
||||||
/// Whether to remove rich reply fallback.
|
|
||||||
remove_reply_fallback: bool,
|
remove_reply_fallback: bool,
|
||||||
|
|
||||||
|
/// Elements to ignore.
|
||||||
|
ignore_elements: Option<HashSet<&'static str>>,
|
||||||
|
|
||||||
|
/// Change to the list of elements to allow.
|
||||||
|
allow_elements: Option<List<HashSet<&'static str>>>,
|
||||||
|
|
||||||
|
/// Change to the list of attributes to replace per element.
|
||||||
|
///
|
||||||
|
/// The content is a map of element name to a map of attribute name to their replacement's
|
||||||
|
/// attribute name.
|
||||||
|
replace_attrs: Option<List<HashMap<&'static str, HashMap<&'static str, &'static str>>>>,
|
||||||
|
|
||||||
|
/// Removed attributes per element.
|
||||||
|
remove_attrs: Option<HashMap<&'static str, HashSet<&'static str>>>,
|
||||||
|
|
||||||
|
/// Change to the list of allowed attributes per element.
|
||||||
|
allow_attrs: Option<List<HashMap<&'static str, HashSet<&'static str>>>>,
|
||||||
|
|
||||||
|
/// Denied URI schemes per attribute per element.
|
||||||
|
///
|
||||||
|
/// The content is a map of element name to a map of attribute name to a set of schemes.
|
||||||
|
deny_schemes: Option<HashMap<&'static str, HashMap<&'static str, HashSet<&'static str>>>>,
|
||||||
|
|
||||||
|
/// Change to the list of allowed URI schemes per attribute per element.
|
||||||
|
///
|
||||||
|
/// The content is a map of element name to a map of attribute name to a set of schemes.
|
||||||
|
#[allow(clippy::type_complexity)]
|
||||||
|
allow_schemes:
|
||||||
|
Option<List<HashMap<&'static str, HashMap<&'static str, HashSet<&'static str>>>>>,
|
||||||
|
|
||||||
|
/// Removed classes per element.
|
||||||
|
///
|
||||||
|
/// The content is a map of element name to a set of classes.
|
||||||
|
remove_classes: Option<HashMap<&'static str, HashSet<&'static str>>>,
|
||||||
|
|
||||||
|
/// Change to the list of allowed classes per element.
|
||||||
|
///
|
||||||
|
/// The content is a map of element name to a set of classes.
|
||||||
|
allow_classes: Option<List<HashMap<&'static str, HashSet<&'static str>>>>,
|
||||||
|
|
||||||
|
/// Maximum nesting level of the elements.
|
||||||
|
max_depth: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SanitizerConfig {
|
impl SanitizerConfig {
|
||||||
/// Constructs an empty `SanitizerConfig` that will not filter any tag or attribute.
|
/// Constructs an empty `SanitizerConfig` that will not filter any element or attribute.
|
||||||
|
///
|
||||||
|
/// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
|
||||||
|
/// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
|
||||||
|
/// [`Self::remove_reply_fallback()`].
|
||||||
|
///
|
||||||
|
/// The list of allowed and replaced attributes can be changed with
|
||||||
|
/// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
|
||||||
|
/// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
|
||||||
|
/// [`Self::allow_classes()`], [`Self::remove_classes()`].
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Constructs a `SanitizerConfig` that will filter tags or attributes not [listed in the
|
/// Constructs a `SanitizerConfig` with the given mode for filtering elements and attributes.
|
||||||
/// Matrix specification].
|
|
||||||
///
|
///
|
||||||
/// Deprecated tags will be replaced with their non-deprecated equivalent.
|
/// The mode defines the basic list of allowed and replaced elements and attributes and the
|
||||||
|
/// maximum nesting level of elements.
|
||||||
///
|
///
|
||||||
/// It will not remove the reply fallback by default.
|
/// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
|
||||||
|
/// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
|
||||||
|
/// [`Self::remove_reply_fallback()`].
|
||||||
///
|
///
|
||||||
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
/// The list of allowed and replaced attributes can be changed with
|
||||||
pub fn strict() -> Self {
|
/// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
|
||||||
Self {
|
/// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
|
||||||
allowed_tags: Some(&ALLOWED_TAGS_WITHOUT_REPLY_STRICT),
|
/// [`Self::allow_classes()`], [`Self::remove_classes()`].
|
||||||
deprecated_tags: Some(&DEPRECATED_TAGS),
|
pub fn with_mode(mode: HtmlSanitizerMode) -> Self {
|
||||||
allowed_attrs: Some(&ALLOWED_ATTRIBUTES_STRICT),
|
Self { mode: Some(mode), ..Default::default() }
|
||||||
deprecated_attrs: Some(&DEPRECATED_ATTRS),
|
|
||||||
allowed_schemes: Some(&ALLOWED_SCHEMES_STRICT),
|
|
||||||
allowed_classes: Some(&ALLOWED_CLASSES_STRICT),
|
|
||||||
max_depth: Some(MAX_DEPTH_STRICT),
|
|
||||||
remove_reply_fallback: false,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Constructs a `SanitizerConfig` that will filter tags or attributes not [listed in the
|
/// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in
|
||||||
/// Matrix specification], except a few for improved compatibility:
|
/// the Matrix specification].
|
||||||
///
|
///
|
||||||
/// - The `matrix` scheme is allowed in links.
|
/// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
|
||||||
|
/// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
|
||||||
|
/// [`Self::remove_reply_fallback()`].
|
||||||
///
|
///
|
||||||
/// Deprecated tags will be replaced with their non-deprecated equivalent.
|
/// The list of allowed and replaced attributes can be changed with
|
||||||
|
/// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
|
||||||
|
/// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
|
||||||
|
/// [`Self::allow_classes()`], [`Self::remove_classes()`].
|
||||||
///
|
///
|
||||||
/// It will not remove the reply fallback by default.
|
/// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Strict)`.
|
||||||
|
///
|
||||||
|
/// [suggested in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
|
pub fn strict() -> Self {
|
||||||
|
Self::with_mode(HtmlSanitizerMode::Strict)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in
|
||||||
|
/// the Matrix specification], except a few for improved compatibility:
|
||||||
|
///
|
||||||
|
/// * The `matrix` scheme is allowed in links.
|
||||||
|
///
|
||||||
|
/// The list of allowed elements can be changed with [`Self::allow_elements()`],
|
||||||
|
/// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
|
||||||
|
/// [`Self::remove_reply_fallback()`].
|
||||||
|
///
|
||||||
|
/// The list of allowed attributes can be changed with [`Self::allow_attributes()`],
|
||||||
|
/// [`Self::replace_attributes()`], [`Self::remove_attributes()`], [`Self::allow_schemes()`],
|
||||||
|
/// [`Self::deny_schemes()`], [`Self::allow_classes()`], [`Self::remove_classes()`].
|
||||||
|
///
|
||||||
|
/// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Compat)`.
|
||||||
///
|
///
|
||||||
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
pub fn compat() -> Self {
|
pub fn compat() -> Self {
|
||||||
Self { allowed_schemes: Some(&ALLOWED_SCHEMES_COMPAT), ..Self::strict() }
|
Self::with_mode(HtmlSanitizerMode::Compat)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change the list of replaced HTML elements.
|
||||||
|
///
|
||||||
|
/// The given list is added to or replaces the list of replacements of the current mode,
|
||||||
|
/// depending on the [`ListBehavior`].
|
||||||
|
///
|
||||||
|
/// The replacement occurs before the removal, so the replaced element should not be in
|
||||||
|
/// the allowed list of elements, but the replacement element should.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `elements`: The list of element names replacements.
|
||||||
|
pub fn replace_elements(
|
||||||
|
mut self,
|
||||||
|
elements: impl IntoIterator<Item = NameReplacement>,
|
||||||
|
behavior: ListBehavior,
|
||||||
|
) -> Self {
|
||||||
|
let content = elements.into_iter().map(|r| r.to_tuple()).collect();
|
||||||
|
self.replace_elements = Some(List { content, behavior });
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove the given HTML elements.
|
||||||
|
///
|
||||||
|
/// When an element is removed, the element and its children are dropped. If you want to remove
|
||||||
|
/// an element but keep its children, use [`SanitizerConfig::ignore_elements`] or
|
||||||
|
/// [`SanitizerConfig::allow_elements`].
|
||||||
|
///
|
||||||
|
/// Removing elements has a higher priority than ignoring or allowing. So if an element is in
|
||||||
|
/// this list, it will always be removed.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `elements`: The list of element names to remove.
|
||||||
|
pub fn remove_elements(mut self, elements: impl IntoIterator<Item = &'static str>) -> Self {
|
||||||
|
self.remove_elements = Some(elements.into_iter().collect());
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove the [rich reply fallback].
|
/// Remove the [rich reply fallback].
|
||||||
///
|
///
|
||||||
|
/// Calling this allows to remove the `mx-reply` element in addition to the list of elements to
|
||||||
|
/// remove.
|
||||||
|
///
|
||||||
|
/// Removing elements has a higher priority than ignoring or allowing. So if this settings is
|
||||||
|
/// set, `mx-reply` will always be removed.
|
||||||
|
///
|
||||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||||
pub fn remove_reply_fallback(mut self) -> Self {
|
pub fn remove_reply_fallback(mut self) -> Self {
|
||||||
self.remove_reply_fallback = true;
|
self.remove_reply_fallback = true;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clean the given HTML with this sanitizer.
|
/// Ignore the given HTML elements.
|
||||||
pub(crate) fn clean(self, html: &mut Html) {
|
///
|
||||||
let root = html.root();
|
/// When an element is ignored, the element is dropped and replaced by its children. If you want
|
||||||
let mut next_child = root.first_child;
|
/// to drop an element and its children, use [`SanitizerConfig::remove_elements`].
|
||||||
|
///
|
||||||
while let Some(child) = next_child {
|
/// Removing elements has a lower priority than removing but a higher priority than allowing.
|
||||||
next_child = html.nodes[child].next_sibling;
|
///
|
||||||
self.clean_node(html, child, 0);
|
/// # Parameters
|
||||||
}
|
///
|
||||||
|
/// * `elements`: The list of element names to ignore.
|
||||||
|
pub fn ignore_elements(mut self, elements: impl IntoIterator<Item = &'static str>) -> Self {
|
||||||
|
self.ignore_elements = Some(elements.into_iter().collect());
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clean_node(&self, html: &mut Html, node_id: usize, depth: u32) {
|
/// Change the list of allowed HTML elements.
|
||||||
self.apply_deprecations(html, node_id);
|
///
|
||||||
|
/// The given list is added to or replaces the list of allowed elements of the current
|
||||||
let action = self.node_action(html, node_id, depth);
|
/// mode, depending on the [`ListBehavior`].
|
||||||
|
///
|
||||||
if action != NodeAction::Remove {
|
/// If an element is not allowed, it is ignored. If no mode is set and no elements are
|
||||||
let mut next_child = html.nodes[node_id].first_child;
|
/// explicitly allowed, all elements are allowed.
|
||||||
while let Some(child) = next_child {
|
///
|
||||||
next_child = html.nodes[child].next_sibling;
|
/// # Parameters
|
||||||
|
///
|
||||||
if action == NodeAction::Ignore {
|
/// * `elements`: The list of element names.
|
||||||
html.insert_before(node_id, child);
|
pub fn allow_elements(
|
||||||
|
mut self,
|
||||||
|
elements: impl IntoIterator<Item = &'static str>,
|
||||||
|
behavior: ListBehavior,
|
||||||
|
) -> Self {
|
||||||
|
let content = elements.into_iter().collect();
|
||||||
|
self.allow_elements = Some(List { content, behavior });
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
self.clean_node(html, child, depth + 1);
|
/// Change the list of replaced attributes per HTML element.
|
||||||
}
|
///
|
||||||
}
|
/// The given list is added to or replaces the list of replacements of the current mode,
|
||||||
|
/// depending on the [`ListBehavior`].
|
||||||
if matches!(action, NodeAction::Ignore | NodeAction::Remove) {
|
///
|
||||||
html.detach(node_id);
|
/// The replacement occurs before the removal, so the replaced attribute should not be in the
|
||||||
} else if let Some(data) = html.nodes[node_id].as_element_mut() {
|
/// list of allowed attributes, but the replacement attribute should. Attribute replacement
|
||||||
self.clean_element_attributes(data);
|
/// occurs before element replacement, so if you want to replace an attribute on an element
|
||||||
}
|
/// that is set to be replaced, you must use the replaced element's name, not the name of its
|
||||||
}
|
|
||||||
|
|
||||||
fn apply_deprecations(&self, html: &mut Html, node_id: usize) {
|
|
||||||
if let NodeData::Element(ElementData { name, attrs, .. }) = &mut html.nodes[node_id].data {
|
|
||||||
let tag: &str = &name.local;
|
|
||||||
|
|
||||||
if let Some(deprecated_attrs) =
|
|
||||||
self.deprecated_attrs.and_then(|deprecated_attrs| deprecated_attrs.get(tag))
|
|
||||||
{
|
|
||||||
*attrs = attrs
|
|
||||||
.clone()
|
|
||||||
.into_iter()
|
|
||||||
.map(|mut attr| {
|
|
||||||
let attr_name: &str = &attr.name.local;
|
|
||||||
|
|
||||||
let attr_replacement =
|
|
||||||
deprecated_attrs.get(attr_name).map(|s| LocalName::from(*s));
|
|
||||||
|
|
||||||
if let Some(attr_replacement) = attr_replacement {
|
|
||||||
attr.name.local = attr_replacement;
|
|
||||||
}
|
|
||||||
|
|
||||||
attr
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
}
|
|
||||||
|
|
||||||
let tag_replacement = self
|
|
||||||
.deprecated_tags
|
|
||||||
.and_then(|deprecated_tags| deprecated_tags.get(tag))
|
|
||||||
.map(|s| LocalName::from(*s));
|
|
||||||
|
|
||||||
if let Some(tag_replacement) = tag_replacement {
|
|
||||||
name.local = tag_replacement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn node_action(&self, html: &Html, node_id: usize, depth: u32) -> NodeAction {
|
|
||||||
match &html.nodes[node_id].data {
|
|
||||||
NodeData::Element(ElementData { name, attrs, .. }) => {
|
|
||||||
let tag: &str = &name.local;
|
|
||||||
|
|
||||||
if (self.remove_reply_fallback && tag == RICH_REPLY_TAG)
|
|
||||||
|| self.max_depth.is_some_and(|max| depth >= max)
|
|
||||||
{
|
|
||||||
NodeAction::Remove
|
|
||||||
} else if self
|
|
||||||
.allowed_tags
|
|
||||||
.is_some_and(|allowed| tag != RICH_REPLY_TAG && !allowed.contains(tag))
|
|
||||||
{
|
|
||||||
NodeAction::Ignore
|
|
||||||
} else if let Some(allowed_schemes) = self.allowed_schemes {
|
|
||||||
for attr in attrs.iter() {
|
|
||||||
let value = &attr.value;
|
|
||||||
let attr: &str = &attr.name.local;
|
|
||||||
|
|
||||||
// Check if there is a (tag, attr) tuple entry.
|
|
||||||
if let Some(schemes) = allowed_schemes.get(&*format!("{tag}:{attr}")) {
|
|
||||||
// Check if the scheme is allowed.
|
|
||||||
if !schemes
|
|
||||||
.iter()
|
|
||||||
.any(|scheme| value.starts_with(&format!("{scheme}:")))
|
|
||||||
{
|
|
||||||
return NodeAction::Ignore;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NodeAction::None
|
|
||||||
} else {
|
|
||||||
NodeAction::None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NodeData::Text(_) => NodeAction::None,
|
|
||||||
_ => NodeAction::Remove,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn clean_element_attributes(&self, data: &mut ElementData) {
|
|
||||||
let ElementData { name, attrs } = data;
|
|
||||||
let tag: &str = &name.local;
|
|
||||||
|
|
||||||
let actions: Vec<_> = attrs
|
|
||||||
.iter()
|
|
||||||
.filter_map(|attr| {
|
|
||||||
let value = &attr.value;
|
|
||||||
let name: &str = &attr.name.local;
|
|
||||||
|
|
||||||
if self
|
|
||||||
.allowed_attrs
|
|
||||||
.is_some_and(|m| !m.get(tag).is_some_and(|attrs| attrs.contains(name)))
|
|
||||||
{
|
|
||||||
return Some(AttributeAction::Remove(attr.to_owned()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if name == "class" {
|
|
||||||
if let Some(classes) = self.allowed_classes.and_then(|m| m.get(tag)) {
|
|
||||||
let mut changed = false;
|
|
||||||
let attr_classes = value.split_whitespace().filter(|attr_class| {
|
|
||||||
for class in classes.iter() {
|
|
||||||
if WildMatch::new(class).matches(attr_class) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
changed = true;
|
|
||||||
false
|
|
||||||
});
|
|
||||||
|
|
||||||
let folded_classes = attr_classes.fold(String::new(), |mut a, b| {
|
|
||||||
a.reserve(b.len() + 1);
|
|
||||||
a.push_str(b);
|
|
||||||
a.push('\n');
|
|
||||||
a
|
|
||||||
});
|
|
||||||
let final_classes = folded_classes.trim_end();
|
|
||||||
|
|
||||||
if changed {
|
|
||||||
if final_classes.is_empty() {
|
|
||||||
return Some(AttributeAction::Remove(attr.to_owned()));
|
|
||||||
} else {
|
|
||||||
return Some(AttributeAction::ReplaceValue(
|
|
||||||
attr.to_owned(),
|
|
||||||
final_classes.to_owned().into(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
for action in actions {
|
|
||||||
match action {
|
|
||||||
AttributeAction::ReplaceValue(attr, value) => {
|
|
||||||
if let Some(mut attr) = attrs.take(&attr) {
|
|
||||||
attr.value = value;
|
|
||||||
attrs.insert(attr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AttributeAction::Remove(attr) => {
|
|
||||||
attrs.remove(&attr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The possible actions to apply to an element node.
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
enum NodeAction {
|
|
||||||
/// Don't do anything.
|
|
||||||
None,
|
|
||||||
|
|
||||||
/// Remove the element but keep its children.
|
|
||||||
Ignore,
|
|
||||||
|
|
||||||
/// Remove the element and its children.
|
|
||||||
Remove,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The possible actions to apply to an attribute.
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum AttributeAction {
|
|
||||||
/// Replace the value of the attribute.
|
|
||||||
ReplaceValue(Attribute, StrTendril),
|
|
||||||
|
|
||||||
/// Remove the attribute.
|
|
||||||
Remove(Attribute),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// List of HTML tags allowed in the Matrix specification, without the rich reply fallback tag.
|
|
||||||
static ALLOWED_TAGS_WITHOUT_REPLY_STRICT: Set<&str> = phf_set! {
|
|
||||||
"del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
|
|
||||||
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "s",
|
|
||||||
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
|
|
||||||
"caption", "pre", "span", "img", "details", "summary",
|
|
||||||
};
|
|
||||||
|
|
||||||
/// The HTML tag name for a rich reply fallback.
|
|
||||||
const RICH_REPLY_TAG: &str = "mx-reply";
|
|
||||||
|
|
||||||
/// HTML tags that were allowed in the Matrix specification, with their replacement.
|
|
||||||
static DEPRECATED_TAGS: Map<&str, &str> = phf_map! {
|
|
||||||
"font" => "span",
|
|
||||||
"strike" => "s",
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Allowed attributes per HTML tag according to the Matrix specification.
|
|
||||||
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
|
||||||
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
|
|
||||||
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
|
|
||||||
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
|
|
||||||
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
|
|
||||||
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
|
|
||||||
};
|
|
||||||
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
|
|
||||||
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler" };
|
|
||||||
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "name", "target", "href" };
|
|
||||||
static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
|
|
||||||
phf_set! { "width", "height", "alt", "title", "src" };
|
|
||||||
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
|
|
||||||
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
|
|
||||||
|
|
||||||
/// Attributes that were allowed on HTML tags according to the Matrix specification, with their
|
|
||||||
/// replacement.
|
/// replacement.
|
||||||
static DEPRECATED_ATTRS: Map<&str, &Map<&str, &str>> = phf_map! {
|
|
||||||
"font" => &DEPRECATED_ATTRIBUTES_FONT,
|
|
||||||
};
|
|
||||||
static DEPRECATED_ATTRIBUTES_FONT: Map<&str, &str> = phf_map! { "color" => "data-mx-color" };
|
|
||||||
|
|
||||||
/// Allowed schemes of URIs per HTML tag and attribute tuple according to the Matrix specification.
|
|
||||||
static ALLOWED_SCHEMES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
|
||||||
"a:href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
|
|
||||||
"img:src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
|
|
||||||
};
|
|
||||||
static ALLOWED_SCHEMES_A_HREF_STRICT: Set<&str> =
|
|
||||||
phf_set! { "http", "https", "ftp", "mailto", "magnet" };
|
|
||||||
static ALLOWED_SCHEMES_IMG_SRC_STRICT: Set<&str> = phf_set! { "mxc" };
|
|
||||||
|
|
||||||
/// Extra allowed schemes of URIs per HTML tag and attribute tuple.
|
|
||||||
///
|
///
|
||||||
/// This is a convenience list to add schemes that can be encountered but are not listed in the
|
/// # Parameters
|
||||||
/// Matrix specification. It consists of:
|
|
||||||
///
|
///
|
||||||
/// * The `matrix` scheme for `a` tags (see [matrix-org/matrix-spec#1108]).
|
/// * `attrs`: The list of element's attributes replacements.
|
||||||
///
|
pub fn replace_attributes<'a>(
|
||||||
/// To get a complete list, add these to `ALLOWED_SCHEMES_STRICT`.
|
mut self,
|
||||||
///
|
attrs: impl IntoIterator<Item = ElementAttributesReplacement<'a>>,
|
||||||
/// [matrix-org/matrix-spec#1108]: https://github.com/matrix-org/matrix-spec/issues/1108
|
behavior: ListBehavior,
|
||||||
static ALLOWED_SCHEMES_COMPAT: Map<&str, &Set<&str>> = phf_map! {
|
) -> Self {
|
||||||
"a:href" => &ALLOWED_SCHEMES_A_HREF_COMPAT,
|
let content = attrs.into_iter().map(|r| r.to_tuple()).collect();
|
||||||
"img:src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
|
self.replace_attrs = Some(List { content, behavior });
|
||||||
};
|
self
|
||||||
static ALLOWED_SCHEMES_A_HREF_COMPAT: Set<&str> =
|
}
|
||||||
phf_set! { "http", "https", "ftp", "mailto", "magnet", "matrix" };
|
|
||||||
|
|
||||||
/// Allowed classes per HTML tag according to the Matrix specification.
|
/// Remove the given attributes per HTML element.
|
||||||
static ALLOWED_CLASSES_STRICT: Map<&str, &Set<&str>> =
|
///
|
||||||
phf_map! { "code" => &ALLOWED_CLASSES_CODE_STRICT };
|
/// Removing attributes has a higher priority than allowing. So if an attribute is in
|
||||||
static ALLOWED_CLASSES_CODE_STRICT: Set<&str> = phf_set! { "language-*" };
|
/// this list, it will always be removed.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `attrs`: The list of attributes per element. The value of `parent` is the element name,
|
||||||
|
/// and `properties` contains attribute names.
|
||||||
|
pub fn remove_attributes<'a>(
|
||||||
|
mut self,
|
||||||
|
attrs: impl IntoIterator<Item = PropertiesNames<'a>>,
|
||||||
|
) -> Self {
|
||||||
|
self.remove_attrs = Some(attrs.into_iter().map(|a| a.to_tuple()).collect());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Max depth of nested HTML tags allowed by the Matrix specification.
|
/// Change the list of allowed attributes per HTML element.
|
||||||
const MAX_DEPTH_STRICT: u32 = 100;
|
///
|
||||||
|
/// The given list is added to or replaces the list of allowed attributes of the current
|
||||||
|
/// mode, depending on the [`ListBehavior`].
|
||||||
|
///
|
||||||
|
/// If an attribute is not allowed, it is removed. If no mode is set and no attributes are
|
||||||
|
/// explicitly allowed, all attributes are allowed.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `attrs`: The list of attributes per element. The value of `parent` is the element name,
|
||||||
|
/// and `properties` contains attribute names.
|
||||||
|
pub fn allow_attributes<'a>(
|
||||||
|
mut self,
|
||||||
|
attrs: impl IntoIterator<Item = PropertiesNames<'a>>,
|
||||||
|
behavior: ListBehavior,
|
||||||
|
) -> Self {
|
||||||
|
let content = attrs.into_iter().map(|a| a.to_tuple()).collect();
|
||||||
|
self.allow_attrs = Some(List { content, behavior });
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deny the given URI schemes per attribute per HTML element.
|
||||||
|
///
|
||||||
|
/// Denying schemes has a higher priority than allowing. So if a scheme is in
|
||||||
|
/// this list, it will always be denied.
|
||||||
|
///
|
||||||
|
/// If a scheme is denied, its element is removed, because it is deemed that the element will
|
||||||
|
/// not be usable without it URI.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `schemes`: The list of schemes per attribute per element.
|
||||||
|
pub fn deny_schemes<'a>(
|
||||||
|
mut self,
|
||||||
|
schemes: impl IntoIterator<Item = ElementAttributesSchemes<'a>>,
|
||||||
|
) -> Self {
|
||||||
|
self.deny_schemes = Some(schemes.into_iter().map(|s| s.to_tuple()).collect());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change the list of allowed schemes per attribute per HTML element.
|
||||||
|
///
|
||||||
|
/// The given list is added to or replaces the list of allowed schemes of the current
|
||||||
|
/// mode, depending on the [`ListBehavior`].
|
||||||
|
///
|
||||||
|
/// If a scheme is not allowed, it is denied. If a scheme is denied, its element is ignored,
|
||||||
|
/// because it is deemed that the element will not be usable without it URI. If no mode is set
|
||||||
|
/// and no schemes are explicitly allowed, all schemes are allowed.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `schemes`: The list of schemes per attribute per element.
|
||||||
|
pub fn allow_schemes<'a>(
|
||||||
|
mut self,
|
||||||
|
schemes: impl IntoIterator<Item = ElementAttributesSchemes<'a>>,
|
||||||
|
behavior: ListBehavior,
|
||||||
|
) -> Self {
|
||||||
|
let content = schemes.into_iter().map(|s| s.to_tuple()).collect();
|
||||||
|
self.allow_schemes = Some(List { content, behavior });
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deny the given classes per HTML element.
|
||||||
|
///
|
||||||
|
/// Removing classes has a higher priority than allowing. So if a class is in
|
||||||
|
/// this list, it will always be removed.
|
||||||
|
///
|
||||||
|
/// If all the classes of a `class` attribute are removed, the whole attribute is removed.
|
||||||
|
///
|
||||||
|
/// In the list of classes, the names must match the full class name. `*` can be used as a
|
||||||
|
/// wildcard for any number of characters. So `language` will only match a class named
|
||||||
|
/// `language`, and `language-*` will match any class name starting with `language-`.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `attrs`: The list of classes per element. The value of `parent` is the element name, and
|
||||||
|
/// `properties` contains classes.
|
||||||
|
pub fn remove_classes<'a>(
|
||||||
|
mut self,
|
||||||
|
classes: impl IntoIterator<Item = PropertiesNames<'a>>,
|
||||||
|
) -> Self {
|
||||||
|
self.remove_classes = Some(classes.into_iter().map(|c| c.to_tuple()).collect());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change the list of allowed classes per HTML element.
|
||||||
|
///
|
||||||
|
/// The given list is added, removed or replaces the list of allowed classes of the current
|
||||||
|
/// mode, depending on the [`ListBehavior`].
|
||||||
|
///
|
||||||
|
/// If a class is not allowed, it is removed. If all the classes of a `class` attribute are
|
||||||
|
/// removed, the whole attribute is removed. If no mode is set and no classes are explicitly
|
||||||
|
/// allowed, all classes are allowed.
|
||||||
|
///
|
||||||
|
/// In the list of classes, the names must match the full class name. `*` can be used as a
|
||||||
|
/// wildcard for any number of characters. So `language` will only match a class named
|
||||||
|
/// `language`, and `language-*` will match any class name starting with `language-`.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `attrs`: The list of classes per element. The value of `parent` is the element name, and
|
||||||
|
/// `properties` contains classes.
|
||||||
|
pub fn allow_classes<'a>(
|
||||||
|
mut self,
|
||||||
|
classes: impl IntoIterator<Item = PropertiesNames<'a>>,
|
||||||
|
behavior: ListBehavior,
|
||||||
|
) -> Self {
|
||||||
|
let content = classes.into_iter().map(|c| c.to_tuple()).collect();
|
||||||
|
self.allow_classes = Some(List { content, behavior });
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The maximum nesting level of HTML elements.
|
||||||
|
///
|
||||||
|
/// This overrides the maximum depth set by the mode, if one is set.
|
||||||
|
///
|
||||||
|
/// All elements that are deeper than the maximum depth will be removed. If no mode is set and
|
||||||
|
/// no maximum depth is explicitly set, elements are not filtered by their nesting level.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// * `depth`: The maximum nesting level allowed.
|
||||||
|
pub fn max_depth(mut self, depth: u32) -> Self {
|
||||||
|
self.max_depth = Some(depth);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A list with a behavior.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct List<T> {
|
||||||
|
/// The content of this list.
|
||||||
|
content: T,
|
||||||
|
|
||||||
|
/// The behavior of this list.
|
||||||
|
behavior: ListBehavior,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> List<T> {
|
||||||
|
/// Whether this is `ListBehavior::Override`.
|
||||||
|
fn is_override(&self) -> bool {
|
||||||
|
self.behavior == ListBehavior::Override
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The behavior of the setting.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
#[allow(clippy::exhaustive_enums)]
|
||||||
|
pub enum ListBehavior {
|
||||||
|
/// The list replaces the default list of the current mode, if one is set.
|
||||||
|
///
|
||||||
|
/// If no mode is set, this is the full allow list.
|
||||||
|
Override,
|
||||||
|
|
||||||
|
/// The list is added to the default list of the current mode, if one is set.
|
||||||
|
///
|
||||||
|
/// If no mode is set, this is the full allow list.
|
||||||
|
Add,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The replacement of a name.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
#[allow(clippy::exhaustive_structs)]
|
||||||
|
pub struct NameReplacement {
|
||||||
|
/// The name to replace.
|
||||||
|
pub old: &'static str,
|
||||||
|
/// The name of the replacement.
|
||||||
|
pub new: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NameReplacement {
|
||||||
|
fn to_tuple(self) -> (&'static str, &'static str) {
|
||||||
|
(self.old, self.new)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A list of properties names for a parent.
|
||||||
|
#[allow(clippy::exhaustive_structs)]
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct PropertiesNames<'a> {
|
||||||
|
/// The name of the parent.
|
||||||
|
pub parent: &'static str,
|
||||||
|
/// The list of properties names.
|
||||||
|
pub properties: &'a [&'static str],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PropertiesNames<'a> {
|
||||||
|
fn to_tuple(self) -> (&'static str, HashSet<&'static str>) {
|
||||||
|
let set = self.properties.iter().copied().collect();
|
||||||
|
(self.parent, set)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The replacement of an element's attributes.
|
||||||
|
#[allow(clippy::exhaustive_structs)]
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct ElementAttributesReplacement<'a> {
|
||||||
|
/// The name of the element.
|
||||||
|
pub element: &'static str,
|
||||||
|
/// The list of attributes replacements.
|
||||||
|
pub replacements: &'a [NameReplacement],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ElementAttributesReplacement<'a> {
|
||||||
|
fn to_tuple(self) -> (&'static str, HashMap<&'static str, &'static str>) {
|
||||||
|
let map = self.replacements.iter().map(|r| r.to_tuple()).collect();
|
||||||
|
(self.element, map)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An element's attributes' URI schemes.
|
||||||
|
#[allow(clippy::exhaustive_structs)]
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct ElementAttributesSchemes<'a> {
|
||||||
|
/// The name of the element.
|
||||||
|
pub element: &'static str,
|
||||||
|
/// The list of allowed URI schemes per attribute name.
|
||||||
|
///
|
||||||
|
/// The value of the `parent` is the attribute name and the properties are schemes.
|
||||||
|
pub attr_schemes: &'a [PropertiesNames<'a>],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ElementAttributesSchemes<'a> {
|
||||||
|
fn to_tuple(self) -> (&'static str, HashMap<&'static str, HashSet<&'static str>>) {
|
||||||
|
let map = self.attr_schemes.iter().map(|s| s.to_tuple()).collect();
|
||||||
|
(self.element, map)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
466
crates/ruma-html/src/sanitizer_config/clean.rs
Normal file
466
crates/ruma-html/src/sanitizer_config/clean.rs
Normal file
@ -0,0 +1,466 @@
|
|||||||
|
use html5ever::{tendril::StrTendril, Attribute, LocalName};
|
||||||
|
use phf::{phf_map, phf_set, Map, Set};
|
||||||
|
use wildmatch::WildMatch;
|
||||||
|
|
||||||
|
use crate::{ElementData, Html, HtmlSanitizerMode, NodeData, SanitizerConfig};
|
||||||
|
|
||||||
|
/// HTML elements allowed in the Matrix specification.
|
||||||
|
static ALLOWED_ELEMENTS_STRICT: Set<&str> = phf_set! {
|
||||||
|
"del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
|
||||||
|
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "s",
|
||||||
|
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
|
||||||
|
"caption", "pre", "span", "img", "details", "summary", "mx-reply",
|
||||||
|
};
|
||||||
|
|
||||||
|
/// The HTML element name for a rich reply fallback.
|
||||||
|
const RICH_REPLY_ELEMENT_NAME: &str = "mx-reply";
|
||||||
|
|
||||||
|
/// HTML elements that were previously allowed in the Matrix specification, with their replacement.
|
||||||
|
static DEPRECATED_ELEMENTS: Map<&str, &str> = phf_map! {
|
||||||
|
"font" => "span",
|
||||||
|
"strike" => "s",
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Allowed attributes per HTML element according to the Matrix specification.
|
||||||
|
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||||
|
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
|
||||||
|
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
|
||||||
|
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
|
||||||
|
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
|
||||||
|
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
|
||||||
|
};
|
||||||
|
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
|
||||||
|
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler" };
|
||||||
|
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "name", "target", "href" };
|
||||||
|
static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
|
||||||
|
phf_set! { "width", "height", "alt", "title", "src" };
|
||||||
|
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
|
||||||
|
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
|
||||||
|
|
||||||
|
/// Attributes that were previously allowed on HTML elements according to the Matrix specification,
|
||||||
|
/// with their replacement.
|
||||||
|
static DEPRECATED_ATTRS: Map<&str, &Map<&str, &str>> = phf_map! {
|
||||||
|
"font" => &DEPRECATED_ATTRIBUTES_FONT,
|
||||||
|
};
|
||||||
|
static DEPRECATED_ATTRIBUTES_FONT: Map<&str, &str> = phf_map! { "color" => "data-mx-color" };
|
||||||
|
|
||||||
|
/// Allowed schemes of URIs per attribute per HTML element according to the Matrix specification.
|
||||||
|
static ALLOWED_SCHEMES_STRICT: Map<&str, &Map<&str, &Set<&str>>> = phf_map! {
|
||||||
|
"a" => &ALLOWED_SCHEMES_A_STRICT,
|
||||||
|
"img" => &ALLOWED_SCHEMES_IMG_STRICT,
|
||||||
|
};
|
||||||
|
static ALLOWED_SCHEMES_A_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||||
|
"href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
|
||||||
|
};
|
||||||
|
pub(crate) static ALLOWED_SCHEMES_A_HREF_STRICT: Set<&str> =
|
||||||
|
phf_set! { "http", "https", "ftp", "mailto", "magnet" };
|
||||||
|
static ALLOWED_SCHEMES_IMG_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||||
|
"src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
|
||||||
|
};
|
||||||
|
static ALLOWED_SCHEMES_IMG_SRC_STRICT: Set<&str> = phf_set! { "mxc" };
|
||||||
|
|
||||||
|
/// Extra allowed schemes of URIs per attribute per HTML element.
|
||||||
|
///
|
||||||
|
/// This is a convenience list to add schemes that can be encountered but are not listed in the
|
||||||
|
/// Matrix specification. It consists of:
|
||||||
|
///
|
||||||
|
/// * The `matrix` scheme for `a` elements (see [matrix-org/matrix-spec#1108]).
|
||||||
|
///
|
||||||
|
/// To get a complete list, add these to `ALLOWED_SCHEMES_STRICT`.
|
||||||
|
///
|
||||||
|
/// [matrix-org/matrix-spec#1108]: https://github.com/matrix-org/matrix-spec/issues/1108
|
||||||
|
static ALLOWED_SCHEMES_COMPAT: Map<&str, &Map<&str, &Set<&str>>> = phf_map! {
|
||||||
|
"a" => &ALLOWED_SCHEMES_A_COMPAT,
|
||||||
|
};
|
||||||
|
static ALLOWED_SCHEMES_A_COMPAT: Map<&str, &Set<&str>> = phf_map! {
|
||||||
|
"href" => &ALLOWED_SCHEMES_A_HREF_COMPAT,
|
||||||
|
};
|
||||||
|
pub(crate) static ALLOWED_SCHEMES_A_HREF_COMPAT: Set<&str> = phf_set! { "matrix" };
|
||||||
|
|
||||||
|
/// Allowed classes per HTML element according to the Matrix specification.
|
||||||
|
static ALLOWED_CLASSES_STRICT: Map<&str, &Set<&str>> =
|
||||||
|
phf_map! { "code" => &ALLOWED_CLASSES_CODE_STRICT };
|
||||||
|
static ALLOWED_CLASSES_CODE_STRICT: Set<&str> = phf_set! { "language-*" };
|
||||||
|
|
||||||
|
/// Max depth of nested HTML elements allowed by the Matrix specification.
|
||||||
|
const MAX_DEPTH_STRICT: u32 = 100;
|
||||||
|
|
||||||
|
impl SanitizerConfig {
|
||||||
|
/// Whether the current mode uses the values of the strict mode.
|
||||||
|
fn use_strict(&self) -> bool {
|
||||||
|
self.mode.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether the current mode uses the values of the compat mode.
|
||||||
|
fn use_compat(&self) -> bool {
|
||||||
|
self.mode.is_some_and(|m| m == HtmlSanitizerMode::Compat)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The maximum nesting level allowed by the config.
|
||||||
|
fn max_depth_value(&self) -> Option<u32> {
|
||||||
|
self.max_depth.or_else(|| self.use_strict().then_some(MAX_DEPTH_STRICT))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clean the given HTML with this sanitizer.
|
||||||
|
pub(crate) fn clean(&self, html: &mut Html) {
|
||||||
|
let root = html.root();
|
||||||
|
let mut next_child = root.first_child;
|
||||||
|
|
||||||
|
while let Some(child) = next_child {
|
||||||
|
next_child = html.nodes[child].next_sibling;
|
||||||
|
self.clean_node(html, child, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clean_node(&self, html: &mut Html, node_id: usize, depth: u32) {
|
||||||
|
self.apply_replacements(html, node_id);
|
||||||
|
|
||||||
|
let action = self.node_action(html, node_id, depth);
|
||||||
|
|
||||||
|
if action != NodeAction::Remove {
|
||||||
|
let mut next_child = html.nodes[node_id].first_child;
|
||||||
|
while let Some(child) = next_child {
|
||||||
|
next_child = html.nodes[child].next_sibling;
|
||||||
|
|
||||||
|
if action == NodeAction::Ignore {
|
||||||
|
html.insert_before(node_id, child);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.clean_node(html, child, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(action, NodeAction::Ignore | NodeAction::Remove) {
|
||||||
|
html.detach(node_id);
|
||||||
|
} else if let Some(data) = html.nodes[node_id].as_element_mut() {
|
||||||
|
self.clean_element_attributes(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_replacements(&self, html: &mut Html, node_id: usize) {
|
||||||
|
if let NodeData::Element(ElementData { name, attrs, .. }) = &mut html.nodes[node_id].data {
|
||||||
|
let element_name = name.local.as_ref();
|
||||||
|
|
||||||
|
// Replace attributes.
|
||||||
|
let list_replacements =
|
||||||
|
self.replace_attrs.as_ref().and_then(|list| list.content.get(element_name));
|
||||||
|
let list_is_override =
|
||||||
|
self.replace_attrs.as_ref().map(|list| list.is_override()).unwrap_or_default();
|
||||||
|
let mode_replacements = (!list_is_override && self.use_strict())
|
||||||
|
.then(|| DEPRECATED_ATTRS.get(element_name))
|
||||||
|
.flatten();
|
||||||
|
|
||||||
|
if list_replacements.is_some() || mode_replacements.is_some() {
|
||||||
|
*attrs = attrs
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|mut attr| {
|
||||||
|
let attr_name = attr.name.local.as_ref();
|
||||||
|
|
||||||
|
let attr_replacement = list_replacements
|
||||||
|
.and_then(|s| s.get(attr_name))
|
||||||
|
.or_else(|| mode_replacements.and_then(|s| s.get(attr_name)))
|
||||||
|
.copied();
|
||||||
|
|
||||||
|
if let Some(attr_replacement) = attr_replacement {
|
||||||
|
attr.name.local = LocalName::from(attr_replacement);
|
||||||
|
}
|
||||||
|
|
||||||
|
attr
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace element.
|
||||||
|
let mut element_replacement = self
|
||||||
|
.replace_elements
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|list| list.content.get(element_name))
|
||||||
|
.copied();
|
||||||
|
|
||||||
|
if element_replacement.is_none() {
|
||||||
|
let list_is_override = self
|
||||||
|
.replace_elements
|
||||||
|
.as_ref()
|
||||||
|
.map(|list| list.is_override())
|
||||||
|
.unwrap_or_default();
|
||||||
|
element_replacement = (!list_is_override && self.use_strict())
|
||||||
|
.then(|| DEPRECATED_ELEMENTS.get(element_name))
|
||||||
|
.flatten()
|
||||||
|
.copied();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(element_replacement) = element_replacement {
|
||||||
|
name.local = LocalName::from(element_replacement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn node_action(&self, html: &Html, node_id: usize, depth: u32) -> NodeAction {
|
||||||
|
match &html.nodes[node_id].data {
|
||||||
|
NodeData::Element(ElementData { name, attrs, .. }) => {
|
||||||
|
let element_name = name.local.as_ref();
|
||||||
|
|
||||||
|
// Check if element should be removed.
|
||||||
|
if self.remove_elements.as_ref().is_some_and(|set| set.contains(element_name)) {
|
||||||
|
return NodeAction::Remove;
|
||||||
|
}
|
||||||
|
if self.remove_reply_fallback && element_name == RICH_REPLY_ELEMENT_NAME {
|
||||||
|
return NodeAction::Remove;
|
||||||
|
}
|
||||||
|
if self.max_depth_value().is_some_and(|max| depth >= max) {
|
||||||
|
return NodeAction::Remove;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if element should be ignored.
|
||||||
|
if self.ignore_elements.as_ref().is_some_and(|set| set.contains(element_name)) {
|
||||||
|
return NodeAction::Ignore;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if element should be allowed.
|
||||||
|
if self.allow_elements.is_some() || self.use_strict() {
|
||||||
|
let list_allowed = self
|
||||||
|
.allow_elements
|
||||||
|
.as_ref()
|
||||||
|
.is_some_and(|list| list.content.contains(element_name));
|
||||||
|
let list_is_override = self
|
||||||
|
.allow_elements
|
||||||
|
.as_ref()
|
||||||
|
.map(|list| list.is_override())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let mode_allowed = !list_is_override
|
||||||
|
&& self.use_strict()
|
||||||
|
&& ALLOWED_ELEMENTS_STRICT.contains(element_name);
|
||||||
|
|
||||||
|
if !list_allowed && !mode_allowed {
|
||||||
|
return NodeAction::Ignore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if element contains scheme that should be denied.
|
||||||
|
if let Some(deny_schemes) =
|
||||||
|
self.deny_schemes.as_ref().and_then(|map| map.get(element_name))
|
||||||
|
{
|
||||||
|
for attr in attrs.iter() {
|
||||||
|
let value = &attr.value;
|
||||||
|
let attr_name = attr.name.local.as_ref();
|
||||||
|
|
||||||
|
if let Some(schemes) = deny_schemes.get(attr_name) {
|
||||||
|
// Check if the scheme is denied.
|
||||||
|
if schemes.iter().any(|scheme| value.starts_with(&format!("{scheme}:")))
|
||||||
|
{
|
||||||
|
return NodeAction::Ignore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.allow_schemes.is_none() && !self.use_strict() {
|
||||||
|
// All schemes are allowed.
|
||||||
|
return NodeAction::None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if element contains scheme that should be allowed.
|
||||||
|
let list_element_schemes =
|
||||||
|
self.allow_schemes.as_ref().and_then(|list| list.content.get(element_name));
|
||||||
|
let list_is_override =
|
||||||
|
self.allow_schemes.as_ref().map(|list| list.is_override()).unwrap_or_default();
|
||||||
|
let strict_mode_element_schemes = (!list_is_override && self.use_strict())
|
||||||
|
.then(|| ALLOWED_SCHEMES_STRICT.get(element_name))
|
||||||
|
.flatten();
|
||||||
|
let compat_mode_element_schemes = (!list_is_override && self.use_compat())
|
||||||
|
.then(|| ALLOWED_SCHEMES_COMPAT.get(element_name))
|
||||||
|
.flatten();
|
||||||
|
|
||||||
|
if list_element_schemes.is_none()
|
||||||
|
&& strict_mode_element_schemes.is_none()
|
||||||
|
&& compat_mode_element_schemes.is_none()
|
||||||
|
{
|
||||||
|
// We don't check schemes for this element.
|
||||||
|
return NodeAction::None;
|
||||||
|
}
|
||||||
|
|
||||||
|
for attr in attrs.iter() {
|
||||||
|
let value = &attr.value;
|
||||||
|
let attr_name = attr.name.local.as_ref();
|
||||||
|
|
||||||
|
let list_attr_schemes = list_element_schemes.and_then(|map| map.get(attr_name));
|
||||||
|
let strict_mode_attr_schemes =
|
||||||
|
strict_mode_element_schemes.and_then(|map| map.get(attr_name));
|
||||||
|
let compat_mode_attr_schemes =
|
||||||
|
compat_mode_element_schemes.and_then(|map| map.get(attr_name));
|
||||||
|
|
||||||
|
if list_attr_schemes.is_none()
|
||||||
|
&& strict_mode_attr_schemes.is_none()
|
||||||
|
&& compat_mode_attr_schemes.is_none()
|
||||||
|
{
|
||||||
|
// We don't check schemes for this attribute.
|
||||||
|
return NodeAction::None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut allowed_schemes = list_attr_schemes
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.chain(strict_mode_attr_schemes.map(|set| set.iter()).into_iter().flatten())
|
||||||
|
.chain(
|
||||||
|
compat_mode_attr_schemes.map(|set| set.iter()).into_iter().flatten(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check if the scheme is allowed.
|
||||||
|
if !allowed_schemes.any(|scheme| value.starts_with(&format!("{scheme}:"))) {
|
||||||
|
return NodeAction::Ignore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeAction::None
|
||||||
|
}
|
||||||
|
NodeData::Text(_) => NodeAction::None,
|
||||||
|
_ => NodeAction::Remove,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clean_element_attributes(&self, data: &mut ElementData) {
|
||||||
|
let ElementData { name, attrs } = data;
|
||||||
|
let element_name = name.local.as_ref();
|
||||||
|
|
||||||
|
let list_remove_attrs = self.remove_attrs.as_ref().and_then(|map| map.get(element_name));
|
||||||
|
|
||||||
|
let whitelist_attrs = self.allow_attrs.is_some() || self.use_strict();
|
||||||
|
let list_allow_attrs =
|
||||||
|
self.allow_attrs.as_ref().and_then(|list| list.content.get(element_name));
|
||||||
|
let list_is_override =
|
||||||
|
self.allow_attrs.as_ref().map(|list| list.is_override()).unwrap_or_default();
|
||||||
|
let mode_allow_attrs = (!list_is_override && self.use_strict())
|
||||||
|
.then(|| ALLOWED_ATTRIBUTES_STRICT.get(element_name))
|
||||||
|
.flatten();
|
||||||
|
|
||||||
|
let list_remove_classes =
|
||||||
|
self.remove_classes.as_ref().and_then(|map| map.get(element_name));
|
||||||
|
|
||||||
|
let whitelist_classes = self.allow_classes.is_some() || self.use_strict();
|
||||||
|
let list_allow_classes =
|
||||||
|
self.allow_classes.as_ref().and_then(|list| list.content.get(element_name));
|
||||||
|
let list_is_override =
|
||||||
|
self.allow_classes.as_ref().map(|list| list.is_override()).unwrap_or_default();
|
||||||
|
let mode_allow_classes = (!list_is_override && self.use_strict())
|
||||||
|
.then(|| ALLOWED_CLASSES_STRICT.get(element_name))
|
||||||
|
.flatten();
|
||||||
|
|
||||||
|
let actions: Vec<_> = attrs
|
||||||
|
.iter()
|
||||||
|
.filter_map(|attr| {
|
||||||
|
let value = &attr.value;
|
||||||
|
let attr_name = attr.name.local.as_ref();
|
||||||
|
|
||||||
|
// Check if the attribute should be removed.
|
||||||
|
if list_remove_attrs.is_some_and(|set| set.contains(attr_name)) {
|
||||||
|
return Some(AttributeAction::Remove(attr.to_owned()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the attribute is allowed.
|
||||||
|
if whitelist_attrs {
|
||||||
|
let list_allowed = list_allow_attrs.is_some_and(|set| set.contains(attr_name));
|
||||||
|
let mode_allowed = mode_allow_attrs.is_some_and(|set| set.contains(attr_name));
|
||||||
|
|
||||||
|
if !list_allowed && !mode_allowed {
|
||||||
|
return Some(AttributeAction::Remove(attr.to_owned()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter classes.
|
||||||
|
if attr_name == "class" {
|
||||||
|
let mut classes = value.split_whitespace().collect::<Vec<_>>();
|
||||||
|
let initial_len = classes.len();
|
||||||
|
|
||||||
|
// Process classes to remove.
|
||||||
|
if let Some(remove_classes) = list_remove_classes {
|
||||||
|
classes.retain(|class| {
|
||||||
|
for remove_class in remove_classes {
|
||||||
|
if WildMatch::new(remove_class).matches(class) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process classes to allow.
|
||||||
|
if whitelist_classes {
|
||||||
|
classes.retain(|class| {
|
||||||
|
let allow_classes = list_allow_classes
|
||||||
|
.map(|set| set.iter())
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.chain(
|
||||||
|
mode_allow_classes.map(|set| set.iter()).into_iter().flatten(),
|
||||||
|
);
|
||||||
|
|
||||||
|
for allow_class in allow_classes {
|
||||||
|
if WildMatch::new(allow_class).matches(class) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if classes.len() == initial_len {
|
||||||
|
// The list has not changed, no action necessary.
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if classes.is_empty() {
|
||||||
|
return Some(AttributeAction::Remove(attr.to_owned()));
|
||||||
|
} else {
|
||||||
|
let new_class = classes.join(" ");
|
||||||
|
return Some(AttributeAction::ReplaceValue(
|
||||||
|
attr.to_owned(),
|
||||||
|
new_class.into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
for action in actions {
|
||||||
|
match action {
|
||||||
|
AttributeAction::ReplaceValue(attr, value) => {
|
||||||
|
if let Some(mut attr) = attrs.take(&attr) {
|
||||||
|
attr.value = value;
|
||||||
|
attrs.insert(attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AttributeAction::Remove(attr) => {
|
||||||
|
attrs.remove(&attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The possible actions to apply to an element node.
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
enum NodeAction {
|
||||||
|
/// Don't do anything.
|
||||||
|
None,
|
||||||
|
|
||||||
|
/// Remove the element but keep its children.
|
||||||
|
Ignore,
|
||||||
|
|
||||||
|
/// Remove the element and its children.
|
||||||
|
Remove,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The possible actions to apply to an attribute.
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum AttributeAction {
|
||||||
|
/// Replace the value of the attribute.
|
||||||
|
ReplaceValue(Attribute, StrTendril),
|
||||||
|
|
||||||
|
/// Remove the attribute.
|
||||||
|
Remove(Attribute),
|
||||||
|
}
|
@ -1,2 +1,4 @@
|
|||||||
|
#[cfg(feature = "matrix")]
|
||||||
|
mod matrix;
|
||||||
mod navigate;
|
mod navigate;
|
||||||
mod sanitize;
|
mod sanitize;
|
||||||
|
323
crates/ruma-html/tests/it/html/matrix.rs
Normal file
323
crates/ruma-html/tests/it/html/matrix.rs
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
use assert_matches2::assert_matches;
|
||||||
|
use ruma_html::{
|
||||||
|
matrix::{AnchorUri, MatrixElement},
|
||||||
|
Html,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn elements() {
|
||||||
|
let raw_html = "\
|
||||||
|
<h1>Title</h1>\
|
||||||
|
<div class=\"text\">\
|
||||||
|
<p>This is some <em>text</em></p>\
|
||||||
|
</div>\
|
||||||
|
<marquee id=\"scrolling_text\">This is scrolling</marquee>
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
// `<h1>` element.
|
||||||
|
let h1_node = html_children.next().unwrap();
|
||||||
|
let h1_element = h1_node.as_element().unwrap().to_matrix();
|
||||||
|
assert_matches!(h1_element.element, MatrixElement::H(heading));
|
||||||
|
assert_eq!(heading.level, 1);
|
||||||
|
assert!(h1_element.attrs.is_empty());
|
||||||
|
|
||||||
|
// `<div>` element.
|
||||||
|
let div_node = html_children.next().unwrap();
|
||||||
|
let div_element = div_node.as_element().unwrap().to_matrix();
|
||||||
|
assert_matches!(div_element.element, MatrixElement::Div);
|
||||||
|
// The `class` attribute is not supported.
|
||||||
|
assert_eq!(div_element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// `<p>` element.
|
||||||
|
let p_node = div_node.first_child().unwrap();
|
||||||
|
let p_element = p_node.as_element().unwrap().to_matrix();
|
||||||
|
assert_matches!(p_element.element, MatrixElement::P);
|
||||||
|
assert!(p_element.attrs.is_empty());
|
||||||
|
|
||||||
|
// Text of `<p>` element.
|
||||||
|
let p_text_node = p_node.first_child().unwrap();
|
||||||
|
|
||||||
|
// `<em>` element.
|
||||||
|
let em_node = p_text_node.next_sibling().unwrap();
|
||||||
|
let em_element = em_node.as_element().unwrap().to_matrix();
|
||||||
|
assert_matches!(em_element.element, MatrixElement::Em);
|
||||||
|
assert!(em_element.attrs.is_empty());
|
||||||
|
|
||||||
|
// `<marquee>` element.
|
||||||
|
let marquee_node = html_children.next().unwrap();
|
||||||
|
let marquee_element = marquee_node.as_element().unwrap().to_matrix();
|
||||||
|
assert_matches!(marquee_element.element, MatrixElement::Other(_));
|
||||||
|
assert_eq!(marquee_element.attrs.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn span_attributes() {
|
||||||
|
let raw_html = "\
|
||||||
|
<span \
|
||||||
|
data-mx-color=\"#00ff00\" \
|
||||||
|
data-mx-bg-color=\"#ff0000\" \
|
||||||
|
data-mx-spoiler \
|
||||||
|
data-mx-spoiler=\"This is a spoiler\"\
|
||||||
|
>\
|
||||||
|
Hidden and colored\
|
||||||
|
</span>\
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
let span_node = html_children.next().unwrap();
|
||||||
|
let span_element = span_node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(span_element.element, MatrixElement::Span(span));
|
||||||
|
|
||||||
|
assert_eq!(span.color.unwrap().as_ref(), "#00ff00");
|
||||||
|
assert_eq!(span.bg_color.unwrap().as_ref(), "#ff0000");
|
||||||
|
// Uses the first spoiler attribute, the second is dropped.
|
||||||
|
assert!(span.spoiler.unwrap().is_empty());
|
||||||
|
|
||||||
|
assert!(span_element.attrs.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn a_attributes() {
|
||||||
|
let raw_html = "\
|
||||||
|
<a \
|
||||||
|
name=\"my_anchor\" \
|
||||||
|
target=\"_blank\" \
|
||||||
|
href=\"https://localhost/\"\
|
||||||
|
>\
|
||||||
|
Link with all supported attributes\
|
||||||
|
</a>\
|
||||||
|
<a href=\"matrix:r/somewhere:localhost\">Link with valid matrix scheme URI</a>\
|
||||||
|
<a href=\"matrix:somewhere:localhost\">Link with invalid matrix scheme URI</a>\
|
||||||
|
<a href=\"https://matrix.to/#/%23somewhere:example.org\">Link with valid matrix.to URI</a>\
|
||||||
|
<a href=\"https://matrix.to/#/somewhere:example.org\">Link with invalid matrix.to URI</a>\
|
||||||
|
<a href=\"ruma:html\">Link with unsupported scheme</a>\
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
// First `<a>` element, with all supported attributes.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::A(anchor));
|
||||||
|
assert_eq!(anchor.name.unwrap().as_ref(), "my_anchor");
|
||||||
|
assert_eq!(anchor.target.unwrap().as_ref(), "_blank");
|
||||||
|
assert_matches!(anchor.href.unwrap(), AnchorUri::Other(uri));
|
||||||
|
assert_eq!(uri.as_ref(), "https://localhost/");
|
||||||
|
assert!(element.attrs.is_empty());
|
||||||
|
|
||||||
|
// Second `<a>` element, with valid matrix scheme URI.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::A(anchor));
|
||||||
|
assert!(anchor.name.is_none());
|
||||||
|
assert!(anchor.target.is_none());
|
||||||
|
assert_matches!(anchor.href.unwrap(), AnchorUri::Matrix(uri));
|
||||||
|
assert_eq!(uri.to_string(), "matrix:r/somewhere:localhost");
|
||||||
|
assert!(element.attrs.is_empty());
|
||||||
|
|
||||||
|
// Third `<a>` element, with invalid matrix scheme URI.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::A(anchor));
|
||||||
|
assert!(anchor.name.is_none());
|
||||||
|
assert!(anchor.target.is_none());
|
||||||
|
assert!(anchor.href.is_none());
|
||||||
|
// The `href` attribute is in the unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// Fourth `<a>` element, with valid matrix.to URI.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::A(anchor));
|
||||||
|
assert!(anchor.name.is_none());
|
||||||
|
assert!(anchor.target.is_none());
|
||||||
|
assert_matches!(anchor.href.unwrap(), AnchorUri::MatrixTo(uri));
|
||||||
|
assert_eq!(uri.to_string(), "https://matrix.to/#/%23somewhere:example.org");
|
||||||
|
assert!(element.attrs.is_empty());
|
||||||
|
|
||||||
|
// Fifth `<a>` element, with invalid matrix.to URI.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::A(anchor));
|
||||||
|
assert!(anchor.name.is_none());
|
||||||
|
assert!(anchor.target.is_none());
|
||||||
|
assert!(anchor.href.is_none());
|
||||||
|
// The `href` attribute is in the unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// Sixth `<a>` element, with unsupported scheme.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::A(anchor));
|
||||||
|
assert!(anchor.name.is_none());
|
||||||
|
assert!(anchor.target.is_none());
|
||||||
|
assert!(anchor.href.is_none());
|
||||||
|
// The `href` attribute is in the unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn img_attributes() {
|
||||||
|
let raw_html = "\
|
||||||
|
<img \
|
||||||
|
width=200 \
|
||||||
|
height=200 \
|
||||||
|
alt=\"Image with valid attributes\" \
|
||||||
|
title=\"Image with valid attributes\" \
|
||||||
|
src=\"mxc://localhost/abc123\" \
|
||||||
|
/>\
|
||||||
|
<img \
|
||||||
|
width=\"\" \
|
||||||
|
height=\"\" \
|
||||||
|
alt=\"Image with invalid attributes\" \
|
||||||
|
title=\"Image with invalid attributes\" \
|
||||||
|
src=\"https://localhost/abc123.png\" \
|
||||||
|
/>\
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
// First `<img>` element, with valid attributes.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Img(image));
|
||||||
|
assert_eq!(image.width.unwrap(), 200);
|
||||||
|
assert_eq!(image.height.unwrap(), 200);
|
||||||
|
assert_eq!(image.alt.unwrap().as_ref(), "Image with valid attributes");
|
||||||
|
assert_eq!(image.title.unwrap().as_ref(), "Image with valid attributes");
|
||||||
|
assert_eq!(image.src.unwrap(), "mxc://localhost/abc123");
|
||||||
|
assert!(element.attrs.is_empty());
|
||||||
|
|
||||||
|
// Second `<img>` element, with invalid attributes.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Img(image));
|
||||||
|
assert!(image.width.is_none());
|
||||||
|
assert!(image.height.is_none());
|
||||||
|
assert_eq!(image.alt.unwrap().as_ref(), "Image with invalid attributes");
|
||||||
|
assert_eq!(image.title.unwrap().as_ref(), "Image with invalid attributes");
|
||||||
|
assert!(image.src.is_none());
|
||||||
|
// Invalid attributes are in the unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ol_attributes() {
|
||||||
|
let raw_html = "\
|
||||||
|
<ol start=2>\
|
||||||
|
<li>Item in list with valid start attribute</li>\
|
||||||
|
</ol>\
|
||||||
|
<ol start=\"beginning\">\
|
||||||
|
<li>Item in list with invalid start attribute</li>\
|
||||||
|
</ol>\
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
// First `<ol>` element, with valid `start` attribute.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Ol(ol));
|
||||||
|
assert_eq!(ol.start.unwrap(), 2);
|
||||||
|
assert!(element.attrs.is_empty());
|
||||||
|
|
||||||
|
// First `<ol>` element, with invalid `start` attribute.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Ol(ol));
|
||||||
|
assert!(ol.start.is_none());
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn code_attributes() {
|
||||||
|
let raw_html = "\
|
||||||
|
<code class=\"language-rust\">\
|
||||||
|
let s = \"Code with only `language-` class\";\
|
||||||
|
</code>\
|
||||||
|
<code class=\"rust-code\">\
|
||||||
|
let s = \"Code with other class\";\
|
||||||
|
</code>\
|
||||||
|
<code class=\"language-rust rust-code\">\
|
||||||
|
let s = \"Code with several classes beginning with `language-` class\";\
|
||||||
|
</code>\
|
||||||
|
<code class=\"rust-code language-rust\">\
|
||||||
|
let s = \"Code with several classes not beginning with `language-` class\";\
|
||||||
|
</code>\
|
||||||
|
<code class=\"language-\">\
|
||||||
|
let s = \"Code with invalid `language-` class\";\
|
||||||
|
</code>\
|
||||||
|
<code class=\"code-language-rust\">\
|
||||||
|
let s = \"Code with other class containing `language-`\";\
|
||||||
|
</code>\
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
// First `<code>` element, with only `language-` class.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Code(code));
|
||||||
|
assert_eq!(code.language.unwrap().as_ref(), "rust");
|
||||||
|
assert!(element.attrs.is_empty());
|
||||||
|
|
||||||
|
// Second `<code>` element, with other class.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Code(code));
|
||||||
|
assert!(code.language.is_none());
|
||||||
|
// `class` is in unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// Third `<code>` element, with several classes beginning with `language-` class.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Code(code));
|
||||||
|
assert_eq!(code.language.unwrap().as_ref(), "rust");
|
||||||
|
// Because it contains other classes, `class` is also in unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// Fourth `<code>` element, with several classes not beginning with `language-` class.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Code(code));
|
||||||
|
assert_eq!(code.language.unwrap().as_ref(), "rust");
|
||||||
|
// Because it contains other classes, `class` is also in unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// Fifth `<code>` element, with invalid `language-` class.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Code(code));
|
||||||
|
assert!(code.language.is_none());
|
||||||
|
// `class` is in unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
|
||||||
|
// Sixth `<code>` element, with other class containing `language-`.
|
||||||
|
let node = html_children.next().unwrap();
|
||||||
|
let element = node.as_element().unwrap().to_matrix();
|
||||||
|
|
||||||
|
assert_matches!(element.element, MatrixElement::Code(code));
|
||||||
|
assert!(code.language.is_none());
|
||||||
|
// `class` is in unsupported attributes.
|
||||||
|
assert_eq!(element.attrs.len(), 1);
|
||||||
|
}
|
@ -1,7 +1,10 @@
|
|||||||
use ruma_html::{Html, SanitizerConfig};
|
use ruma_html::{
|
||||||
|
ElementAttributesReplacement, ElementAttributesSchemes, Html, ListBehavior, NameReplacement,
|
||||||
|
PropertiesNames, SanitizerConfig,
|
||||||
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_input() {
|
fn strict_mode_valid_input() {
|
||||||
let config = SanitizerConfig::strict().remove_reply_fallback();
|
let config = SanitizerConfig::strict().remove_reply_fallback();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -11,7 +14,7 @@ fn valid_input() {
|
|||||||
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -25,7 +28,7 @@ fn valid_input() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tags_remove() {
|
fn strict_mode_elements_remove() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -41,7 +44,7 @@ fn tags_remove() {
|
|||||||
<p>But this is inside a tag</p>\
|
<p>But this is inside a tag</p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -61,7 +64,7 @@ fn tags_remove() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tags_remove_without_reply() {
|
fn strict_mode_elements_reply_remove() {
|
||||||
let config = SanitizerConfig::strict().remove_reply_fallback();
|
let config = SanitizerConfig::strict().remove_reply_fallback();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -77,7 +80,7 @@ fn tags_remove_without_reply() {
|
|||||||
<p>But this is inside a tag</p>\
|
<p>But this is inside a tag</p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -89,7 +92,7 @@ fn tags_remove_without_reply() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tags_remove_only_reply_fallback() {
|
fn remove_only_reply_fallback() {
|
||||||
let config = SanitizerConfig::new().remove_reply_fallback();
|
let config = SanitizerConfig::new().remove_reply_fallback();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -105,7 +108,7 @@ fn tags_remove_only_reply_fallback() {
|
|||||||
<p>But this is inside a tag</p>\
|
<p>But this is inside a tag</p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -117,7 +120,7 @@ fn tags_remove_only_reply_fallback() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn attrs_remove() {
|
fn strict_mode_attrs_remove() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -125,7 +128,7 @@ fn attrs_remove() {
|
|||||||
<p class=\"important\">Look at <span data-mx-color=\"#0000ff\" size=20>me!</span></p>\
|
<p class=\"important\">Look at <span data-mx-color=\"#0000ff\" size=20>me!</span></p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -137,7 +140,7 @@ fn attrs_remove() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn img_remove_scheme() {
|
fn strict_mode_img_remove_scheme() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -145,20 +148,20 @@ fn img_remove_scheme() {
|
|||||||
<img src=\"https://notareal.hs/abcdef\">\
|
<img src=\"https://notareal.hs/abcdef\">\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(html.to_string(), "<p>Look at that picture:</p>");
|
assert_eq!(html.to_string(), "<p>Look at that picture:</p>");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn link_remove_scheme() {
|
fn strict_mode_link_remove_scheme() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
<p>Go see <a href=\"file://local/file.html\">my local website</a></p>\
|
<p>Go see <a href=\"file://local/file.html\">my local website</a></p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -169,7 +172,7 @@ fn link_remove_scheme() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn link_compat_scheme() {
|
fn compat_mode_link_remove_scheme() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -177,7 +180,7 @@ fn link_compat_scheme() {
|
|||||||
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
"\
|
"\
|
||||||
@ -193,7 +196,7 @@ fn link_compat_scheme() {
|
|||||||
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
"\
|
"\
|
||||||
@ -204,7 +207,7 @@ fn link_compat_scheme() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn class_remove() {
|
fn strict_mode_class_remove() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
@ -214,7 +217,7 @@ fn class_remove() {
|
|||||||
<p>What do you think of the name <code class=\"fake-language-rust\">StringList</code>?</p>\
|
<p>What do you think of the name <code class=\"fake-language-rust\">StringList</code>?</p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -228,7 +231,7 @@ fn class_remove() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn depth_remove() {
|
fn strict_mode_depth_remove() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let deeply_nested_html: String = std::iter::repeat("<div>")
|
let deeply_nested_html: String = std::iter::repeat("<div>")
|
||||||
.take(100)
|
.take(100)
|
||||||
@ -240,7 +243,7 @@ fn depth_remove() {
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut html = Html::parse(&deeply_nested_html);
|
let mut html = Html::parse(&deeply_nested_html);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
let res = html.to_string();
|
let res = html.to_string();
|
||||||
assert!(res.contains("I should be fine."));
|
assert!(res.contains("I should be fine."));
|
||||||
@ -248,14 +251,14 @@ fn depth_remove() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn replace_deprecated() {
|
fn strict_mode_replace_deprecated() {
|
||||||
let config = SanitizerConfig::strict();
|
let config = SanitizerConfig::strict();
|
||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
<p>Look at <strike>you </strike><font data-mx-bg-color=\"#ff0000\" color=\"#0000ff\">me!</span></p>\
|
<p>Look at <strike>you </strike><font data-mx-bg-color=\"#ff0000\" color=\"#0000ff\">me!</span></p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
html.to_string(),
|
html.to_string(),
|
||||||
@ -264,3 +267,583 @@ fn replace_deprecated() {
|
|||||||
"
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_elements() {
|
||||||
|
let config = SanitizerConfig::new().allow_elements(["ul", "li", "p", "img"], ListBehavior::Add);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph with some color</p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<mx-reply>This is a fake reply</mx-reply>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_elements() {
|
||||||
|
let config =
|
||||||
|
SanitizerConfig::strict().allow_elements(["ul", "li", "p", "img"], ListBehavior::Override);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph with some color</p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<mx-reply>This is a fake reply</mx-reply>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_elements() {
|
||||||
|
let config = SanitizerConfig::strict().allow_elements(["keep-me"], ListBehavior::Add);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<keep-me>I was kept!</keep-me>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<keep-me>I was kept!</keep-me>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_elements() {
|
||||||
|
let config = SanitizerConfig::strict().remove_elements(["span", "code"]);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph </p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ignore_elements() {
|
||||||
|
let config = SanitizerConfig::new().ignore_elements(["span", "code"]);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph with some color</p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<mx-reply>This is a fake reply</mx-reply>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_elements() {
|
||||||
|
let config = SanitizerConfig::new()
|
||||||
|
.replace_elements([NameReplacement { old: "ul", new: "ol" }], ListBehavior::Add);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ol><li>This</li><li>has</li><li>no</li><li>tag</li></ol>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_elements_override() {
|
||||||
|
let config = SanitizerConfig::strict()
|
||||||
|
.replace_elements([NameReplacement { old: "ul", new: "ol" }], ListBehavior::Override);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<strike>This is wrong</strike>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ol><li>This</li><li>has</li><li>no</li><li>tag</li></ol>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
This is wrong\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_elements_add() {
|
||||||
|
let config = SanitizerConfig::strict()
|
||||||
|
.replace_elements([NameReplacement { old: "ul", new: "ol" }], ListBehavior::Add);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<strike>This is wrong</strike>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ol><li>This</li><li>has</li><li>no</li><li>tag</li></ol>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<s>This is wrong</s>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_attributes() {
|
||||||
|
let config = SanitizerConfig::new().allow_attributes(
|
||||||
|
[PropertiesNames { parent: "img", properties: &["src"] }],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span>with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_attributes() {
|
||||||
|
let config = SanitizerConfig::strict().allow_attributes(
|
||||||
|
[PropertiesNames { parent: "img", properties: &["src"] }],
|
||||||
|
ListBehavior::Override,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span>with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_attributes() {
|
||||||
|
let config = SanitizerConfig::strict().allow_attributes(
|
||||||
|
[PropertiesNames { parent: "img", properties: &["id"] }],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img id=\"my_image\" src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img id=\"my_image\" src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_attributes() {
|
||||||
|
let config = SanitizerConfig::strict()
|
||||||
|
.remove_attributes([PropertiesNames { parent: "span", properties: &["data-mx-color"] }]);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span>with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_attributes() {
|
||||||
|
let config = SanitizerConfig::new().replace_attributes(
|
||||||
|
[ElementAttributesReplacement {
|
||||||
|
element: "span",
|
||||||
|
replacements: &[NameReplacement { old: "data-mx-color", new: "data-mx-bg-color" }],
|
||||||
|
}],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-bg-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_attributes_override() {
|
||||||
|
let config = SanitizerConfig::strict().replace_attributes(
|
||||||
|
[ElementAttributesReplacement {
|
||||||
|
element: "font",
|
||||||
|
replacements: &[NameReplacement { old: "color", new: "data-mx-bg-color" }],
|
||||||
|
}],
|
||||||
|
ListBehavior::Override,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <font color=\"green\">with some color</font></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-bg-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_attributes_add() {
|
||||||
|
let config = SanitizerConfig::strict().replace_attributes(
|
||||||
|
[ElementAttributesReplacement {
|
||||||
|
element: "img",
|
||||||
|
replacements: &[NameReplacement { old: "alt", new: "title" }],
|
||||||
|
}],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <font color=\"green\">with some color</font></p>\
|
||||||
|
<img alt=\"An image\" src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||||
|
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\" title=\"An image\">\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_schemes() {
|
||||||
|
let config = SanitizerConfig::new().allow_schemes(
|
||||||
|
[ElementAttributesSchemes {
|
||||||
|
element: "img",
|
||||||
|
attr_schemes: &[PropertiesNames { parent: "src", properties: &["mxc"] }],
|
||||||
|
}],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<img src=\"https://notareal.hs/abcdef.png\">\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_schemes() {
|
||||||
|
let config = SanitizerConfig::strict().allow_schemes(
|
||||||
|
[ElementAttributesSchemes {
|
||||||
|
element: "img",
|
||||||
|
attr_schemes: &[PropertiesNames { parent: "src", properties: &["https"] }],
|
||||||
|
}],
|
||||||
|
ListBehavior::Override,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<img src=\"https://notareal.hs/abcdef.png\">\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<img src=\"https://notareal.hs/abcdef.png\">\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_schemes() {
|
||||||
|
let config = SanitizerConfig::strict().allow_schemes(
|
||||||
|
[ElementAttributesSchemes {
|
||||||
|
element: "img",
|
||||||
|
attr_schemes: &[PropertiesNames { parent: "src", properties: &["https"] }],
|
||||||
|
}],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<img src=\"https://notareal.hs/abcdef.png\">\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
<img src=\"https://notareal.hs/abcdef.png\">\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn deny_schemes() {
|
||||||
|
let config = SanitizerConfig::strict().deny_schemes([ElementAttributesSchemes {
|
||||||
|
element: "a",
|
||||||
|
attr_schemes: &[PropertiesNames { parent: "href", properties: &["http"] }],
|
||||||
|
}]);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<a href=\"https://notareal.hs/abcdef.png\">Secure link to an image</a>\
|
||||||
|
<a href=\"http://notareal.hs/abcdef.png\">Insecure link to an image</a>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<a href=\"https://notareal.hs/abcdef.png\">Secure link to an image</a>\
|
||||||
|
Insecure link to an image\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_classes() {
|
||||||
|
let config = SanitizerConfig::new().allow_classes(
|
||||||
|
[PropertiesNames { parent: "img", properties: &["custom-class", "custom-class-*"] }],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<img class=\"custom-class custom-class-img img\" src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<code><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
<img class=\"custom-class custom-class-img\" src=\"mxc://notareal.hs/abcdef\">\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn override_classes() {
|
||||||
|
let config = SanitizerConfig::strict().allow_classes(
|
||||||
|
[PropertiesNames { parent: "code", properties: &["custom-class", "custom-class-*"] }],
|
||||||
|
ListBehavior::Override,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<code class=\"language-html custom-class custom-class-code code\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<code class=\"custom-class custom-class-code\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn add_classes() {
|
||||||
|
let config = SanitizerConfig::strict().allow_classes(
|
||||||
|
[PropertiesNames { parent: "code", properties: &["custom-class", "custom-class-*"] }],
|
||||||
|
ListBehavior::Add,
|
||||||
|
);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<code class=\"language-html custom-class custom-class-code code\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<code class=\"language-html custom-class custom-class-code\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_classes() {
|
||||||
|
let config = SanitizerConfig::strict()
|
||||||
|
.remove_classes([PropertiesNames { parent: "code", properties: &["language-rust"] }]);
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<code class=\"language-html language-rust\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(&config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
@ -5,6 +5,8 @@ Bug fixes:
|
|||||||
* Disallow `invite` -> `knock` membership transition
|
* Disallow `invite` -> `knock` membership transition
|
||||||
* The spec was determined to be right about rejecting it in the first place:
|
* The spec was determined to be right about rejecting it in the first place:
|
||||||
<https://github.com/matrix-org/matrix-spec/pull/1717>
|
<https://github.com/matrix-org/matrix-spec/pull/1717>
|
||||||
|
* Perform extra redaction checks on room versions 1 and 2, rather than for
|
||||||
|
version 3 and onwards
|
||||||
|
|
||||||
# 0.10.0
|
# 0.10.0
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ impl RoomVersion {
|
|||||||
special_case_aliases_auth: true,
|
special_case_aliases_auth: true,
|
||||||
strict_canonicaljson: false,
|
strict_canonicaljson: false,
|
||||||
limit_notifications_power_levels: false,
|
limit_notifications_power_levels: false,
|
||||||
extra_redaction_checks: false,
|
extra_redaction_checks: true,
|
||||||
allow_knocking: false,
|
allow_knocking: false,
|
||||||
restricted_join_rules: false,
|
restricted_join_rules: false,
|
||||||
knock_restricted_join_rule: false,
|
knock_restricted_join_rule: false,
|
||||||
@ -101,7 +101,7 @@ impl RoomVersion {
|
|||||||
pub const V2: Self = Self { state_res: StateResolutionVersion::V2, ..Self::V1 };
|
pub const V2: Self = Self { state_res: StateResolutionVersion::V2, ..Self::V1 };
|
||||||
|
|
||||||
pub const V3: Self =
|
pub const V3: Self =
|
||||||
Self { event_format: EventFormatVersion::V2, extra_redaction_checks: true, ..Self::V2 };
|
Self { event_format: EventFormatVersion::V2, extra_redaction_checks: false, ..Self::V2 };
|
||||||
|
|
||||||
pub const V4: Self = Self { event_format: EventFormatVersion::V3, ..Self::V3 };
|
pub const V4: Self = Self { event_format: EventFormatVersion::V3, ..Self::V3 };
|
||||||
|
|
||||||
|
@ -120,6 +120,7 @@ js = ["ruma-common/js"]
|
|||||||
rand = ["ruma-common/rand"]
|
rand = ["ruma-common/rand"]
|
||||||
markdown = ["ruma-events?/markdown"]
|
markdown = ["ruma-events?/markdown"]
|
||||||
html = ["dep:ruma-html", "ruma-events?/html"]
|
html = ["dep:ruma-html", "ruma-events?/html"]
|
||||||
|
html-matrix = ["html", "ruma-html/matrix"]
|
||||||
|
|
||||||
# Everything except compat, js and unstable features
|
# Everything except compat, js and unstable features
|
||||||
full = [
|
full = [
|
||||||
@ -137,6 +138,7 @@ full = [
|
|||||||
"rand",
|
"rand",
|
||||||
"markdown",
|
"markdown",
|
||||||
"html",
|
"html",
|
||||||
|
"html-matrix",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Enable all compatibility hacks. Deprecated.
|
# Enable all compatibility hacks. Deprecated.
|
||||||
|
@ -38,9 +38,11 @@
|
|||||||
//!
|
//!
|
||||||
//! These features are only useful if you want to use a method that requires it:
|
//! These features are only useful if you want to use a method that requires it:
|
||||||
//!
|
//!
|
||||||
//! * `rand`
|
//! * `rand` -- Generate random identifiers.
|
||||||
//! * `markdown`
|
//! * `markdown` -- Parse markdown to construct messages.
|
||||||
//! * `html`
|
//! * `html` -- Parse HTML to sanitize it or navigate its tree.
|
||||||
|
//! * `html-matrix` -- Enables the `matrix` feature of `ruma-html` to parse HTML elements data to
|
||||||
|
//! typed data as suggested by the Matrix Specification.
|
||||||
//!
|
//!
|
||||||
//! # Unstable features
|
//! # Unstable features
|
||||||
//!
|
//!
|
||||||
|
Loading…
x
Reference in New Issue
Block a user