events: Move sanitize HTML features to new ruma-html crate
This commit is contained in:
committed by
Kévin Commaille
parent
acfeb38e90
commit
24ce9d5e09
3
crates/ruma-html/CHANGELOG.md
Normal file
3
crates/ruma-html/CHANGELOG.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# 0.1.0 (unreleased)
|
||||
|
||||
Initial release
|
||||
21
crates/ruma-html/Cargo.toml
Normal file
21
crates/ruma-html/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "ruma-html"
|
||||
version = "0.1.0"
|
||||
description = "Opinionated HTML parsing and manipulating."
|
||||
homepage = "https://www.ruma.io/"
|
||||
keywords = ["matrix", "chat", "messaging", "ruma", "html", "parser"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/ruma/ruma"
|
||||
edition = "2021"
|
||||
rust-version = { workspace = true }
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
all-features = true
|
||||
rustdoc-args = ["--cfg", "docsrs"]
|
||||
|
||||
[dependencies]
|
||||
html5ever = "0.26.0"
|
||||
phf = { version = "0.11.1", features = ["macros"] }
|
||||
tracing = { workspace = true, features = ["attributes"] }
|
||||
wildmatch = "2.0.0"
|
||||
12
crates/ruma-html/README.md
Normal file
12
crates/ruma-html/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# ruma-html
|
||||
|
||||
[](https://crates.io/crates/ruma-html)
|
||||
[](https://docs.rs/ruma-html/)
|
||||

|
||||
|
||||
Opinionated HTML parsing and manipulating library.
|
||||
|
||||
Like the rest of the Ruma crates, this crate is primarily meant to be used for
|
||||
the Matrix protocol. It should be able to be used to interact with any HTML
|
||||
content but will offer APIs focused on specificities of HTML in the Matrix
|
||||
specification.
|
||||
399
crates/ruma-html/src/html_fragment.rs
Normal file
399
crates/ruma-html/src/html_fragment.rs
Normal file
@@ -0,0 +1,399 @@
|
||||
use std::{collections::BTreeSet, fmt, io};
|
||||
|
||||
use html5ever::{
|
||||
local_name, namespace_url, ns, parse_fragment,
|
||||
serialize::{serialize, Serialize, SerializeOpts, Serializer, TraversalScope},
|
||||
tendril::{StrTendril, TendrilSink},
|
||||
tree_builder::{NodeOrText, TreeSink},
|
||||
Attribute, ParseOpts, QualName,
|
||||
};
|
||||
use tracing::debug;
|
||||
|
||||
/// An HTML fragment.
|
||||
///
|
||||
/// To get the serialized HTML, use its `Display` implementation.
|
||||
#[derive(Debug)]
|
||||
pub struct Fragment {
|
||||
pub(crate) nodes: Vec<Node>,
|
||||
}
|
||||
|
||||
impl Fragment {
|
||||
/// Construct a new `Fragment` by parsing the given HTML.
|
||||
pub fn parse_html(html: &str) -> Self {
|
||||
let sink = Self::default();
|
||||
let mut parser = parse_fragment(
|
||||
sink,
|
||||
ParseOpts::default(),
|
||||
QualName::new(None, ns!(html), local_name!("div")),
|
||||
Vec::new(),
|
||||
);
|
||||
parser.process(html.into());
|
||||
parser.finish()
|
||||
}
|
||||
|
||||
/// Construct a new `Node` with the given data and add it to this `Fragment`.
|
||||
///
|
||||
/// Returns the index of the new node.
|
||||
pub fn new_node(&mut self, data: NodeData) -> usize {
|
||||
self.nodes.push(Node::new(data));
|
||||
self.nodes.len() - 1
|
||||
}
|
||||
|
||||
/// Append the given node to the given parent in this `Fragment`.
|
||||
///
|
||||
/// The node is detached from its previous position.
|
||||
pub fn append_node(&mut self, parent_id: usize, node_id: usize) {
|
||||
self.detach(node_id);
|
||||
|
||||
self.nodes[node_id].parent = Some(parent_id);
|
||||
if let Some(last_child) = self.nodes[parent_id].last_child.take() {
|
||||
self.nodes[node_id].prev_sibling = Some(last_child);
|
||||
self.nodes[last_child].next_sibling = Some(node_id);
|
||||
} else {
|
||||
self.nodes[parent_id].first_child = Some(node_id);
|
||||
}
|
||||
self.nodes[parent_id].last_child = Some(node_id);
|
||||
}
|
||||
|
||||
/// Insert the given node before the given sibling in this `Fragment`.
|
||||
///
|
||||
/// The node is detached from its previous position.
|
||||
pub fn insert_before(&mut self, sibling_id: usize, node_id: usize) {
|
||||
self.detach(node_id);
|
||||
|
||||
self.nodes[node_id].parent = self.nodes[sibling_id].parent;
|
||||
self.nodes[node_id].next_sibling = Some(sibling_id);
|
||||
if let Some(prev_sibling) = self.nodes[sibling_id].prev_sibling.take() {
|
||||
self.nodes[node_id].prev_sibling = Some(prev_sibling);
|
||||
self.nodes[prev_sibling].next_sibling = Some(node_id);
|
||||
} else if let Some(parent) = self.nodes[sibling_id].parent {
|
||||
self.nodes[parent].first_child = Some(node_id);
|
||||
}
|
||||
self.nodes[sibling_id].prev_sibling = Some(node_id);
|
||||
}
|
||||
|
||||
/// Detach the given node from this `Fragment`.
|
||||
pub fn detach(&mut self, node_id: usize) {
|
||||
let (parent, prev_sibling, next_sibling) = {
|
||||
let node = &mut self.nodes[node_id];
|
||||
(node.parent.take(), node.prev_sibling.take(), node.next_sibling.take())
|
||||
};
|
||||
|
||||
if let Some(next_sibling) = next_sibling {
|
||||
self.nodes[next_sibling].prev_sibling = prev_sibling;
|
||||
} else if let Some(parent) = parent {
|
||||
self.nodes[parent].last_child = prev_sibling;
|
||||
}
|
||||
|
||||
if let Some(prev_sibling) = prev_sibling {
|
||||
self.nodes[prev_sibling].next_sibling = next_sibling;
|
||||
} else if let Some(parent) = parent {
|
||||
self.nodes[parent].first_child = next_sibling;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Fragment {
|
||||
fn default() -> Self {
|
||||
Self { nodes: vec![Node::new(NodeData::Document)] }
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeSink for Fragment {
|
||||
type Handle = usize;
|
||||
type Output = Self;
|
||||
|
||||
fn finish(self) -> Self::Output {
|
||||
self
|
||||
}
|
||||
|
||||
fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
|
||||
debug!("HTML parse error: {msg}");
|
||||
}
|
||||
|
||||
fn get_document(&mut self) -> Self::Handle {
|
||||
0
|
||||
}
|
||||
|
||||
fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> html5ever::ExpandedName<'a> {
|
||||
self.nodes[*target].as_element().expect("not an element").name.expanded()
|
||||
}
|
||||
|
||||
fn create_element(
|
||||
&mut self,
|
||||
name: QualName,
|
||||
attrs: Vec<Attribute>,
|
||||
_flags: html5ever::tree_builder::ElementFlags,
|
||||
) -> Self::Handle {
|
||||
self.new_node(NodeData::Element(ElementData { name, attrs: attrs.into_iter().collect() }))
|
||||
}
|
||||
|
||||
fn create_comment(&mut self, _text: StrTendril) -> Self::Handle {
|
||||
self.new_node(NodeData::Other)
|
||||
}
|
||||
|
||||
fn create_pi(&mut self, _target: StrTendril, _data: StrTendril) -> Self::Handle {
|
||||
self.new_node(NodeData::Other)
|
||||
}
|
||||
|
||||
fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(index) => self.append_node(*parent, index),
|
||||
NodeOrText::AppendText(text) => {
|
||||
// If the previous sibling is also text, add this text to it.
|
||||
if let Some(sibling) =
|
||||
self.nodes[*parent].last_child.and_then(|child| self.nodes[child].as_text_mut())
|
||||
{
|
||||
sibling.push_tendril(&text);
|
||||
} else {
|
||||
let index = self.new_node(NodeData::Text(text));
|
||||
self.append_node(*parent, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
element: &Self::Handle,
|
||||
prev_element: &Self::Handle,
|
||||
child: NodeOrText<Self::Handle>,
|
||||
) {
|
||||
if self.nodes[*element].parent.is_some() {
|
||||
self.append_before_sibling(element, child);
|
||||
} else {
|
||||
self.append(prev_element, child);
|
||||
}
|
||||
}
|
||||
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
_name: StrTendril,
|
||||
_public_id: StrTendril,
|
||||
_system_id: StrTendril,
|
||||
) {
|
||||
}
|
||||
|
||||
fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
|
||||
*target
|
||||
}
|
||||
|
||||
fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
|
||||
x == y
|
||||
}
|
||||
|
||||
fn set_quirks_mode(&mut self, _mode: html5ever::tree_builder::QuirksMode) {}
|
||||
|
||||
fn append_before_sibling(
|
||||
&mut self,
|
||||
sibling: &Self::Handle,
|
||||
new_node: NodeOrText<Self::Handle>,
|
||||
) {
|
||||
match new_node {
|
||||
NodeOrText::AppendNode(index) => self.insert_before(*sibling, index),
|
||||
NodeOrText::AppendText(text) => {
|
||||
// If the previous sibling is also text, add this text to it.
|
||||
if let Some(prev_text) = self.nodes[*sibling]
|
||||
.prev_sibling
|
||||
.and_then(|prev| self.nodes[prev].as_text_mut())
|
||||
{
|
||||
prev_text.push_tendril(&text);
|
||||
} else {
|
||||
let index = self.new_node(NodeData::Text(text));
|
||||
self.insert_before(*sibling, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>) {
|
||||
let target = self.nodes[*target].as_element_mut().unwrap();
|
||||
target.attrs.extend(attrs);
|
||||
}
|
||||
|
||||
fn remove_from_parent(&mut self, target: &Self::Handle) {
|
||||
self.detach(*target);
|
||||
}
|
||||
|
||||
fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
|
||||
let mut next_child = self.nodes[*node].first_child;
|
||||
while let Some(child) = next_child {
|
||||
next_child = self.nodes[child].next_sibling;
|
||||
self.append_node(*new_parent, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Fragment {
|
||||
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
match traversal_scope {
|
||||
TraversalScope::IncludeNode => {
|
||||
let root = self.nodes[0].first_child.unwrap();
|
||||
|
||||
let mut next_child = self.nodes[root].first_child;
|
||||
while let Some(child) = next_child {
|
||||
let child = &self.nodes[child];
|
||||
child.serialize(self, serializer)?;
|
||||
next_child = child.next_sibling;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
TraversalScope::ChildrenOnly(_) => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Fragment {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut u8_vec = Vec::new();
|
||||
serialize(
|
||||
&mut u8_vec,
|
||||
self,
|
||||
SerializeOpts { traversal_scope: TraversalScope::IncludeNode, ..Default::default() },
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
f.write_str(&String::from_utf8(u8_vec).unwrap())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// An HTML node.
|
||||
#[derive(Debug)]
|
||||
#[non_exhaustive]
|
||||
pub struct Node {
|
||||
pub(crate) parent: Option<usize>,
|
||||
pub(crate) prev_sibling: Option<usize>,
|
||||
pub(crate) next_sibling: Option<usize>,
|
||||
pub(crate) first_child: Option<usize>,
|
||||
pub(crate) last_child: Option<usize>,
|
||||
pub(crate) data: NodeData,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Constructs a new `Node` with the given data.
|
||||
pub fn new(data: NodeData) -> Self {
|
||||
Self {
|
||||
parent: None,
|
||||
prev_sibling: None,
|
||||
next_sibling: None,
|
||||
first_child: None,
|
||||
last_child: None,
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the `ElementData` of this `Node` if it is a `NodeData::Element`.
|
||||
pub fn as_element(&self) -> Option<&ElementData> {
|
||||
match &self.data {
|
||||
NodeData::Element(data) => Some(data),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the mutable `ElementData` of this `Node` if it is a `NodeData::Element`.
|
||||
pub fn as_element_mut(&mut self) -> Option<&mut ElementData> {
|
||||
match &mut self.data {
|
||||
NodeData::Element(data) => Some(data),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the mutable text content of this `Node`, if it is a `NodeData::Text`.
|
||||
pub fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
|
||||
match &mut self.data {
|
||||
NodeData::Text(data) => Some(data),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub(crate) fn serialize<S>(&self, fragment: &Fragment, serializer: &mut S) -> io::Result<()>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
match &self.data {
|
||||
NodeData::Element(data) => {
|
||||
serializer.start_elem(
|
||||
data.name.clone(),
|
||||
data.attrs.iter().map(|attr| (&attr.name, &*attr.value)),
|
||||
)?;
|
||||
|
||||
let mut next_child = self.first_child;
|
||||
while let Some(child) = next_child {
|
||||
let child = &fragment.nodes[child];
|
||||
child.serialize(fragment, serializer)?;
|
||||
next_child = child.next_sibling;
|
||||
}
|
||||
|
||||
serializer.end_elem(data.name.clone())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
NodeData::Document => {
|
||||
let mut next_child = self.first_child;
|
||||
while let Some(child) = next_child {
|
||||
let child = &fragment.nodes[child];
|
||||
child.serialize(fragment, serializer)?;
|
||||
next_child = child.next_sibling;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
NodeData::Text(text) => serializer.write_text(text),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The data of a `Node`.
|
||||
#[derive(Debug)]
|
||||
#[allow(clippy::exhaustive_enums)]
|
||||
pub enum NodeData {
|
||||
/// The root node of the `Fragment`.
|
||||
Document,
|
||||
|
||||
/// A text node.
|
||||
Text(StrTendril),
|
||||
|
||||
/// An HTML element (aka a tag).
|
||||
Element(ElementData),
|
||||
|
||||
/// Other types (comment, processing instruction, …).
|
||||
Other,
|
||||
}
|
||||
|
||||
/// The data of an HTML element.
|
||||
#[derive(Debug)]
|
||||
#[allow(clippy::exhaustive_structs)]
|
||||
pub struct ElementData {
|
||||
/// The qualified name of the element.
|
||||
pub name: QualName,
|
||||
|
||||
/// The attributes of the element.
|
||||
pub attrs: BTreeSet<Attribute>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Fragment;
|
||||
|
||||
#[test]
|
||||
fn sanity() {
|
||||
let html = "\
|
||||
<h1>Title</h1>\
|
||||
<div>\
|
||||
<p>This is some <em>text</em></p>\
|
||||
</div>\
|
||||
";
|
||||
assert_eq!(Fragment::parse_html(html).to_string(), html);
|
||||
|
||||
assert_eq!(Fragment::parse_html("").to_string(), "");
|
||||
}
|
||||
}
|
||||
19
crates/ruma-html/src/lib.rs
Normal file
19
crates/ruma-html/src/lib.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
#![doc(html_favicon_url = "https://www.ruma.io/favicon.ico")]
|
||||
#![doc(html_logo_url = "https://www.ruma.io/images/logo.png")]
|
||||
//! Opinionated HTML parsing and manipulating library.
|
||||
//!
|
||||
//! Like the rest of the Ruma crates, this crate is primarily meant to be used for
|
||||
//! the Matrix protocol. It should be able to be used to interact with any HTML
|
||||
//! document but will offer APIs focused on specificities of HTML in the Matrix
|
||||
//! specification..
|
||||
|
||||
#![warn(missing_docs)]
|
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
|
||||
|
||||
mod html_fragment;
|
||||
mod sanitize;
|
||||
|
||||
pub use self::{
|
||||
html_fragment::{ElementData, Fragment, Node, NodeData},
|
||||
sanitize::*,
|
||||
};
|
||||
157
crates/ruma-html/src/sanitize.rs
Normal file
157
crates/ruma-html/src/sanitize.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
//! Convenience methods and types to sanitize HTML messages.
|
||||
|
||||
mod html_sanitizer;
|
||||
|
||||
pub use self::html_sanitizer::HtmlSanitizer;
|
||||
|
||||
/// Sanitize the given HTML string.
|
||||
///
|
||||
/// This removes the [tags and attributes] that are not listed in the Matrix specification.
|
||||
///
|
||||
/// It can also optionally remove the [rich reply fallback].
|
||||
///
|
||||
/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||
pub fn sanitize_html(
|
||||
s: &str,
|
||||
mode: HtmlSanitizerMode,
|
||||
remove_reply_fallback: RemoveReplyFallback,
|
||||
) -> String {
|
||||
let sanitizer = HtmlSanitizer::new(mode, remove_reply_fallback);
|
||||
sanitizer.clean(s).to_string()
|
||||
}
|
||||
|
||||
/// What HTML [tags and attributes] should be kept by the sanitizer.
|
||||
///
|
||||
/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[allow(clippy::exhaustive_enums)]
|
||||
pub enum HtmlSanitizerMode {
|
||||
/// Keep only the tags and attributes listed in the Matrix specification.
|
||||
Strict,
|
||||
|
||||
/// Like `Strict` mode, with additional tags and attributes that are not yet included in
|
||||
/// the spec, but are reasonable to keep.
|
||||
Compat,
|
||||
}
|
||||
|
||||
/// Whether to remove the [rich reply fallback] while sanitizing.
|
||||
///
|
||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[allow(clippy::exhaustive_enums)]
|
||||
pub enum RemoveReplyFallback {
|
||||
/// Remove the rich reply fallback.
|
||||
Yes,
|
||||
|
||||
/// Don't remove the rich reply fallback.
|
||||
No,
|
||||
}
|
||||
|
||||
/// Remove the [rich reply fallback] of the given HTML string.
|
||||
///
|
||||
/// Due to the fact that the HTML is parsed, note that malformed HTML and comments will be stripped
|
||||
/// from the output.
|
||||
///
|
||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||
pub fn remove_html_reply_fallback(s: &str) -> String {
|
||||
let sanitizer = HtmlSanitizer::reply_fallback_remover();
|
||||
sanitizer.clean(s).to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
remove_html_reply_fallback, sanitize_html, HtmlSanitizerMode, RemoveReplyFallback,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn sanitize() {
|
||||
let sanitized = sanitize_html(
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
<removed>This has no tag</removed>\
|
||||
<p>But this is inside a tag</p>\
|
||||
",
|
||||
HtmlSanitizerMode::Strict,
|
||||
RemoveReplyFallback::No,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized,
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
This has no tag\
|
||||
<p>But this is inside a tag</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_without_reply() {
|
||||
let sanitized = sanitize_html(
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
<removed>This has no tag</removed>\
|
||||
<p>But this is inside a tag</p>\
|
||||
",
|
||||
HtmlSanitizerMode::Strict,
|
||||
RemoveReplyFallback::Yes,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized,
|
||||
"\
|
||||
This has no tag\
|
||||
<p>But this is inside a tag</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_html_reply() {
|
||||
let without_reply = remove_html_reply_fallback(
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
<keep-me>This keeps its tag</keep-me>\
|
||||
<p>But this is inside a tag</p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
without_reply,
|
||||
"\
|
||||
<keep-me>This keeps its tag</keep-me>\
|
||||
<p>But this is inside a tag</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
}
|
||||
540
crates/ruma-html/src/sanitize/html_sanitizer.rs
Normal file
540
crates/ruma-html/src/sanitize/html_sanitizer.rs
Normal file
@@ -0,0 +1,540 @@
|
||||
use html5ever::{tendril::StrTendril, Attribute};
|
||||
use phf::{phf_map, phf_set, Map, Set};
|
||||
use wildmatch::WildMatch;
|
||||
|
||||
use super::{HtmlSanitizerMode, RemoveReplyFallback};
|
||||
use crate::{ElementData, Fragment, NodeData};
|
||||
|
||||
/// A sanitizer to filter [HTML tags and attributes] according to the Matrix specification.
|
||||
///
|
||||
/// [HTML tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HtmlSanitizer {
|
||||
/// The mode of the HTML sanitizer.
|
||||
mode: HtmlSanitizerMode,
|
||||
|
||||
/// Whether to filter HTML tags and attributes.
|
||||
///
|
||||
/// If this is `true`, tags and attributes that do not match the lists will be removed, but
|
||||
/// the tags' children will still be present in the output.
|
||||
///
|
||||
/// If this is `false`, all the tags and attributes are allowed.
|
||||
filter_tags_attributes: bool,
|
||||
|
||||
/// Whether to remove replies.
|
||||
///
|
||||
/// If this is `true`, the rich reply fallback will be removed.
|
||||
///
|
||||
/// If this is `false`, the rich reply tag will be allowed.
|
||||
remove_replies: bool,
|
||||
}
|
||||
|
||||
impl HtmlSanitizer {
|
||||
/// Constructs a `HTMLSanitizer` that will filter the tags and attributes according to the given
|
||||
/// mode.
|
||||
///
|
||||
/// It can also optionally remove the [rich reply fallback].
|
||||
///
|
||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||
pub fn new(mode: HtmlSanitizerMode, remove_reply_fallback: RemoveReplyFallback) -> Self {
|
||||
Self {
|
||||
mode,
|
||||
filter_tags_attributes: true,
|
||||
remove_replies: remove_reply_fallback == RemoveReplyFallback::Yes,
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a `HTMLSanitizer` instance that only removes the [rich reply fallback].
|
||||
///
|
||||
/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies
|
||||
pub fn reply_fallback_remover() -> Self {
|
||||
Self {
|
||||
mode: HtmlSanitizerMode::Strict,
|
||||
filter_tags_attributes: false,
|
||||
remove_replies: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Clean the given HTML string with this sanitizer.
|
||||
pub fn clean(&self, html: &str) -> Fragment {
|
||||
let mut fragment = Fragment::parse_html(html);
|
||||
|
||||
let root = fragment.nodes[0].first_child.unwrap();
|
||||
let mut next_child = fragment.nodes[root].first_child;
|
||||
while let Some(child) = next_child {
|
||||
next_child = fragment.nodes[child].next_sibling;
|
||||
self.clean_node(&mut fragment, child, 0);
|
||||
}
|
||||
|
||||
fragment
|
||||
}
|
||||
|
||||
fn clean_node(&self, fragment: &mut Fragment, node_id: usize, depth: u32) {
|
||||
let action = self.node_action(fragment, node_id, depth);
|
||||
|
||||
if action != NodeAction::Remove {
|
||||
let mut next_child = fragment.nodes[node_id].first_child;
|
||||
while let Some(child) = next_child {
|
||||
next_child = fragment.nodes[child].next_sibling;
|
||||
|
||||
if action == NodeAction::Ignore {
|
||||
fragment.insert_before(node_id, child);
|
||||
}
|
||||
|
||||
self.clean_node(fragment, child, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if matches!(action, NodeAction::Ignore | NodeAction::Remove) {
|
||||
fragment.detach(node_id);
|
||||
} else if self.filter_tags_attributes {
|
||||
if let Some(data) = fragment.nodes[node_id].as_element_mut() {
|
||||
self.clean_element_attributes(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn node_action(&self, fragment: &Fragment, node_id: usize, depth: u32) -> NodeAction {
|
||||
match &fragment.nodes[node_id].data {
|
||||
NodeData::Element(ElementData { name, attrs, .. }) => {
|
||||
let tag: &str = &name.local;
|
||||
|
||||
if (self.remove_replies && tag == RICH_REPLY_TAG)
|
||||
|| (self.filter_tags_attributes && depth >= MAX_DEPTH_STRICT)
|
||||
{
|
||||
NodeAction::Remove
|
||||
} else if self.filter_tags_attributes
|
||||
&& (!ALLOWED_TAGS_WITHOUT_REPLY_STRICT.contains(tag) && tag != RICH_REPLY_TAG)
|
||||
{
|
||||
NodeAction::Ignore
|
||||
} else if self.filter_tags_attributes {
|
||||
let allowed_schemes = if self.mode == HtmlSanitizerMode::Strict {
|
||||
&ALLOWED_SCHEMES_STRICT
|
||||
} else {
|
||||
&ALLOWED_SCHEMES_COMPAT
|
||||
};
|
||||
for attr in attrs.iter() {
|
||||
let value = &attr.value;
|
||||
let attr: &str = &attr.name.local;
|
||||
|
||||
// Check if there is a (tag, attr) tuple entry.
|
||||
if let Some(schemes) = allowed_schemes.get(&*format!("{tag}:{attr}")) {
|
||||
// Check if the scheme is allowed.
|
||||
if !schemes
|
||||
.iter()
|
||||
.any(|scheme| value.starts_with(&format!("{scheme}:")))
|
||||
{
|
||||
return NodeAction::Ignore;
|
||||
}
|
||||
}
|
||||
}
|
||||
NodeAction::None
|
||||
} else {
|
||||
NodeAction::None
|
||||
}
|
||||
}
|
||||
NodeData::Text(_) => NodeAction::None,
|
||||
_ => NodeAction::Remove,
|
||||
}
|
||||
}
|
||||
|
||||
fn clean_element_attributes(&self, data: &mut ElementData) {
|
||||
let ElementData { name, attrs } = data;
|
||||
let tag: &str = &name.local;
|
||||
|
||||
let actions: Vec<_> = attrs
|
||||
.iter()
|
||||
.filter_map(|attr| {
|
||||
let value = &attr.value;
|
||||
let name: &str = &attr.name.local;
|
||||
|
||||
if ALLOWED_ATTRIBUTES_STRICT.get(tag).filter(|attrs| attrs.contains(name)).is_none()
|
||||
{
|
||||
return Some(AttributeAction::Remove(attr.to_owned()));
|
||||
}
|
||||
|
||||
if name == "class" {
|
||||
if let Some(classes) = ALLOWED_CLASSES_STRICT.get(tag) {
|
||||
let mut changed = false;
|
||||
let attr_classes = value.split_whitespace().filter(|attr_class| {
|
||||
for class in classes.iter() {
|
||||
if WildMatch::new(class).matches(attr_class) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
changed = true;
|
||||
false
|
||||
});
|
||||
|
||||
let folded_classes = attr_classes.fold(String::new(), |mut a, b| {
|
||||
a.reserve(b.len() + 1);
|
||||
a.push_str(b);
|
||||
a.push('\n');
|
||||
a
|
||||
});
|
||||
let final_classes = folded_classes.trim_end();
|
||||
|
||||
if changed {
|
||||
if final_classes.is_empty() {
|
||||
return Some(AttributeAction::Remove(attr.to_owned()));
|
||||
} else {
|
||||
return Some(AttributeAction::ReplaceValue(
|
||||
attr.to_owned(),
|
||||
final_classes.to_owned().into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
|
||||
for action in actions {
|
||||
match action {
|
||||
AttributeAction::ReplaceValue(attr, value) => {
|
||||
if let Some(mut attr) = attrs.take(&attr) {
|
||||
attr.value = value;
|
||||
attrs.insert(attr);
|
||||
}
|
||||
}
|
||||
AttributeAction::Remove(attr) => {
|
||||
attrs.remove(&attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The possible actions to apply to an element node.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum NodeAction {
|
||||
/// Don't do anything.
|
||||
None,
|
||||
|
||||
/// Remove the element but keep its children.
|
||||
Ignore,
|
||||
|
||||
/// Remove the element and its children.
|
||||
Remove,
|
||||
}
|
||||
|
||||
/// The possible actions to apply to an element node.
|
||||
#[derive(Debug)]
|
||||
enum AttributeAction {
|
||||
/// Replace the value of the attribute.
|
||||
ReplaceValue(Attribute, StrTendril),
|
||||
|
||||
/// Remove the element and its children.
|
||||
Remove(Attribute),
|
||||
}
|
||||
|
||||
/// List of HTML tags allowed in the Matrix specification, without the rich reply fallback tag.
|
||||
static ALLOWED_TAGS_WITHOUT_REPLY_STRICT: Set<&str> = phf_set! {
|
||||
"font", "del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
|
||||
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "strike",
|
||||
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
|
||||
"caption", "pre", "span", "img", "details", "summary",
|
||||
};
|
||||
|
||||
/// The HTML tag name for a rich reply fallback.
|
||||
const RICH_REPLY_TAG: &str = "mx-reply";
|
||||
|
||||
/// Allowed attributes per HTML tag according to the Matrix specification.
|
||||
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||
"font" => &ALLOWED_ATTRIBUTES_FONT_STRICT,
|
||||
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
|
||||
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
|
||||
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
|
||||
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
|
||||
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
|
||||
};
|
||||
static ALLOWED_ATTRIBUTES_FONT_STRICT: Set<&str> =
|
||||
phf_set! { "data-mx-bg-color", "data-mx-color", "color" };
|
||||
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
|
||||
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler" };
|
||||
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "name", "target", "href" };
|
||||
static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
|
||||
phf_set! { "width", "height", "alt", "title", "src" };
|
||||
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
|
||||
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
|
||||
|
||||
/// Allowed schemes of URIs per HTML tag and attribute tuple according to the Matrix specification.
|
||||
static ALLOWED_SCHEMES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||
"a:href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
|
||||
"img:src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
|
||||
};
|
||||
static ALLOWED_SCHEMES_A_HREF_STRICT: Set<&str> =
|
||||
phf_set! { "http", "https", "ftp", "mailto", "magnet" };
|
||||
static ALLOWED_SCHEMES_IMG_SRC_STRICT: Set<&str> = phf_set! { "mxc" };
|
||||
|
||||
/// Extra allowed schemes of URIs per HTML tag and attribute tuple.
|
||||
///
|
||||
/// This is a convenience list to add schemes that can be encountered but are not listed in the
|
||||
/// Matrix specification. It consists of:
|
||||
///
|
||||
/// * The `matrix` scheme for `a` tags (see [matrix-org/matrix-spec#1108]).
|
||||
///
|
||||
/// To get a complete list, add these to `ALLOWED_SCHEMES_STRICT`.
|
||||
///
|
||||
/// [matrix-org/matrix-spec#1108]: https://github.com/matrix-org/matrix-spec/issues/1108
|
||||
static ALLOWED_SCHEMES_COMPAT: Map<&str, &Set<&str>> = phf_map! {
|
||||
"a:href" => &ALLOWED_SCHEMES_A_HREF_COMPAT,
|
||||
"img:src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
|
||||
};
|
||||
static ALLOWED_SCHEMES_A_HREF_COMPAT: Set<&str> =
|
||||
phf_set! { "http", "https", "ftp", "mailto", "magnet", "matrix" };
|
||||
|
||||
/// Allowed classes per HTML tag according to the Matrix specification.
|
||||
static ALLOWED_CLASSES_STRICT: Map<&str, &Set<&str>> =
|
||||
phf_map! { "code" => &ALLOWED_CLASSES_CODE_STRICT };
|
||||
static ALLOWED_CLASSES_CODE_STRICT: Set<&str> = phf_set! { "language-*" };
|
||||
|
||||
/// Max depth of nested HTML tags allowed by the Matrix specification.
|
||||
const MAX_DEPTH_STRICT: u32 = 100;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{HtmlSanitizer, HtmlSanitizerMode, RemoveReplyFallback};
|
||||
|
||||
#[test]
|
||||
fn valid_input() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::Yes);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
|
||||
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
|
||||
<img src=\"mxc://notareal.hs/abcdef\">\
|
||||
<code class=\"language-html\"><mx-reply>This is a fake reply</mx-reply></code>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tags_remove() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
<removed>This has no tag</removed>\
|
||||
<p>But this is inside a tag</p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
This has no tag\
|
||||
<p>But this is inside a tag</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tags_remove_without_reply() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::Yes);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
<removed>This has no tag</removed>\
|
||||
<p>But this is inside a tag</p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
This has no tag\
|
||||
<p>But this is inside a tag</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tags_remove_only_reply_fallback() {
|
||||
let sanitizer = HtmlSanitizer::reply_fallback_remover();
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<mx-reply>\
|
||||
<blockquote>\
|
||||
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
|
||||
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
|
||||
<br>\
|
||||
Previous message\
|
||||
</blockquote>\
|
||||
</mx-reply>\
|
||||
<keep-me>This keeps its tag</keep-me>\
|
||||
<p>But this is inside a tag</p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<keep-me>This keeps its tag</keep-me>\
|
||||
<p>But this is inside a tag</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn attrs_remove() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<h1 id=\"anchor1\">Title for important stuff</h1>\
|
||||
<p class=\"important\">Look at <font color=\"blue\" size=20>me!</font></p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<h1>Title for important stuff</h1>\
|
||||
<p>Look at <font color=\"blue\">me!</font></p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn img_remove_scheme() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<p>Look at that picture:</p>\
|
||||
<img src=\"https://notareal.hs/abcdef\">\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<p>Look at that picture:</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn link_remove_scheme() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<p>Go see <a href=\"file://local/file.html\">my local website</a></p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<p>Go see my local website</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn link_compat_scheme() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<p>Join <a href=\"matrix:r/myroom:notareal.hs\">my room</a></p>\
|
||||
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
||||
",
|
||||
);
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<p>Join my room</p>\
|
||||
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
||||
"
|
||||
);
|
||||
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Compat, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<p>Join <a href=\"matrix:r/myroom:notareal.hs\">my room</a></p>\
|
||||
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
||||
",
|
||||
);
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<p>Join <a href=\"matrix:r/myroom:notareal.hs\">my room</a></p>\
|
||||
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_remove() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let sanitized = sanitizer.clean(
|
||||
"\
|
||||
<pre><code class=\"language-rust custom-class\">
|
||||
type StringList = Vec<String>;
|
||||
</code></pre>\
|
||||
<p>What do you think of the name <code class=\"fake-language-rust\">StringList</code>?</p>\
|
||||
",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
sanitized.to_string(),
|
||||
"\
|
||||
<pre><code class=\"language-rust\">
|
||||
type StringList = Vec<String>;
|
||||
</code></pre>\
|
||||
<p>What do you think of the name <code>StringList</code>?</p>\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn depth_remove() {
|
||||
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
|
||||
let deeply_nested_html: String = std::iter::repeat("<div>")
|
||||
.take(100)
|
||||
.chain(Some(
|
||||
"<span>I am in too deep!</span>\
|
||||
I should be fine.",
|
||||
))
|
||||
.chain(std::iter::repeat("</div>").take(100))
|
||||
.collect();
|
||||
|
||||
let sanitized = sanitizer.clean(&deeply_nested_html).to_string();
|
||||
|
||||
assert!(sanitized.contains("I should be fine."));
|
||||
assert!(!sanitized.contains("I am in too deep!"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user