html: Add support for deprecated HTML tags
According to Matrix 1.10
This commit is contained in:
parent
5a0faa81b1
commit
da1df75619
@ -1,5 +1,9 @@
|
|||||||
# [unreleased]
|
# [unreleased]
|
||||||
|
|
||||||
|
Improvements:
|
||||||
|
|
||||||
|
- Add support for deprecated HTML tags, according to Matrix 1.10
|
||||||
|
|
||||||
# 0.1.0
|
# 0.1.0
|
||||||
|
|
||||||
Initial release
|
Initial release
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use html5ever::{tendril::StrTendril, Attribute};
|
use html5ever::{tendril::StrTendril, Attribute, LocalName};
|
||||||
use phf::{phf_map, phf_set, Map, Set};
|
use phf::{phf_map, phf_set, Map, Set};
|
||||||
use wildmatch::WildMatch;
|
use wildmatch::WildMatch;
|
||||||
|
|
||||||
@ -12,11 +12,22 @@ pub struct SanitizerConfig {
|
|||||||
/// If this is `None`, all tags are allowed.
|
/// If this is `None`, all tags are allowed.
|
||||||
allowed_tags: Option<&'static Set<&'static str>>,
|
allowed_tags: Option<&'static Set<&'static str>>,
|
||||||
|
|
||||||
|
/// The allowed deprecated HTML tags.
|
||||||
|
///
|
||||||
|
/// This is a map of allowed deprecated tag to their replacement tag.
|
||||||
|
deprecated_tags: Option<&'static Map<&'static str, &'static str>>,
|
||||||
|
|
||||||
/// The allowed attributes per tag.
|
/// The allowed attributes per tag.
|
||||||
///
|
///
|
||||||
/// If this is `None`, all attributes are allowed.
|
/// If this is `None`, all attributes are allowed.
|
||||||
allowed_attrs: Option<&'static Map<&'static str, &'static Set<&'static str>>>,
|
allowed_attrs: Option<&'static Map<&'static str, &'static Set<&'static str>>>,
|
||||||
|
|
||||||
|
/// The allowed deprecated attributes per tag.
|
||||||
|
///
|
||||||
|
/// This is a map of tag to a map of allowed deprecated attribute to their replacement
|
||||||
|
/// attribute.
|
||||||
|
deprecated_attrs: Option<&'static Map<&'static str, &'static Map<&'static str, &'static str>>>,
|
||||||
|
|
||||||
/// The allowed URI schemes per tag.
|
/// The allowed URI schemes per tag.
|
||||||
///
|
///
|
||||||
/// If this is `None`, all schemes are allowed.
|
/// If this is `None`, all schemes are allowed.
|
||||||
@ -43,13 +54,17 @@ impl SanitizerConfig {
|
|||||||
/// Constructs a `SanitizerConfig` that will filter tags or attributes not [listed in the
|
/// Constructs a `SanitizerConfig` that will filter tags or attributes not [listed in the
|
||||||
/// Matrix specification].
|
/// Matrix specification].
|
||||||
///
|
///
|
||||||
|
/// Deprecated tags will be replaced with their non-deprecated equivalent.
|
||||||
|
///
|
||||||
/// It will not remove the reply fallback by default.
|
/// It will not remove the reply fallback by default.
|
||||||
///
|
///
|
||||||
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
pub fn strict() -> Self {
|
pub fn strict() -> Self {
|
||||||
Self {
|
Self {
|
||||||
allowed_tags: Some(&ALLOWED_TAGS_WITHOUT_REPLY_STRICT),
|
allowed_tags: Some(&ALLOWED_TAGS_WITHOUT_REPLY_STRICT),
|
||||||
|
deprecated_tags: Some(&DEPRECATED_TAGS),
|
||||||
allowed_attrs: Some(&ALLOWED_ATTRIBUTES_STRICT),
|
allowed_attrs: Some(&ALLOWED_ATTRIBUTES_STRICT),
|
||||||
|
deprecated_attrs: Some(&DEPRECATED_ATTRS),
|
||||||
allowed_schemes: Some(&ALLOWED_SCHEMES_STRICT),
|
allowed_schemes: Some(&ALLOWED_SCHEMES_STRICT),
|
||||||
allowed_classes: Some(&ALLOWED_CLASSES_STRICT),
|
allowed_classes: Some(&ALLOWED_CLASSES_STRICT),
|
||||||
max_depth: Some(MAX_DEPTH_STRICT),
|
max_depth: Some(MAX_DEPTH_STRICT),
|
||||||
@ -62,6 +77,8 @@ impl SanitizerConfig {
|
|||||||
///
|
///
|
||||||
/// - The `matrix` scheme is allowed in links.
|
/// - The `matrix` scheme is allowed in links.
|
||||||
///
|
///
|
||||||
|
/// Deprecated tags will be replaced with their non-deprecated equivalent.
|
||||||
|
///
|
||||||
/// It will not remove the reply fallback by default.
|
/// It will not remove the reply fallback by default.
|
||||||
///
|
///
|
||||||
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
/// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
|
||||||
@ -89,6 +106,8 @@ impl SanitizerConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn clean_node(&self, html: &mut Html, node_id: usize, depth: u32) {
|
fn clean_node(&self, html: &mut Html, node_id: usize, depth: u32) {
|
||||||
|
self.apply_deprecations(html, node_id);
|
||||||
|
|
||||||
let action = self.node_action(html, node_id, depth);
|
let action = self.node_action(html, node_id, depth);
|
||||||
|
|
||||||
if action != NodeAction::Remove {
|
if action != NodeAction::Remove {
|
||||||
@ -111,6 +130,42 @@ impl SanitizerConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn apply_deprecations(&self, html: &mut Html, node_id: usize) {
|
||||||
|
if let NodeData::Element(ElementData { name, attrs, .. }) = &mut html.nodes[node_id].data {
|
||||||
|
let tag: &str = &name.local;
|
||||||
|
|
||||||
|
if let Some(deprecated_attrs) =
|
||||||
|
self.deprecated_attrs.and_then(|deprecated_attrs| deprecated_attrs.get(tag))
|
||||||
|
{
|
||||||
|
*attrs = attrs
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|mut attr| {
|
||||||
|
let attr_name: &str = &attr.name.local;
|
||||||
|
|
||||||
|
let attr_replacement =
|
||||||
|
deprecated_attrs.get(attr_name).map(|s| LocalName::from(*s));
|
||||||
|
|
||||||
|
if let Some(attr_replacement) = attr_replacement {
|
||||||
|
attr.name.local = attr_replacement;
|
||||||
|
}
|
||||||
|
|
||||||
|
attr
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
let tag_replacement = self
|
||||||
|
.deprecated_tags
|
||||||
|
.and_then(|deprecated_tags| deprecated_tags.get(tag))
|
||||||
|
.map(|s| LocalName::from(*s));
|
||||||
|
|
||||||
|
if let Some(tag_replacement) = tag_replacement {
|
||||||
|
name.local = tag_replacement;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn node_action(&self, html: &Html, node_id: usize, depth: u32) -> NodeAction {
|
fn node_action(&self, html: &Html, node_id: usize, depth: u32) -> NodeAction {
|
||||||
match &html.nodes[node_id].data {
|
match &html.nodes[node_id].data {
|
||||||
NodeData::Element(ElementData { name, attrs, .. }) => {
|
NodeData::Element(ElementData { name, attrs, .. }) => {
|
||||||
@ -247,8 +302,8 @@ enum AttributeAction {
|
|||||||
|
|
||||||
/// List of HTML tags allowed in the Matrix specification, without the rich reply fallback tag.
|
/// List of HTML tags allowed in the Matrix specification, without the rich reply fallback tag.
|
||||||
static ALLOWED_TAGS_WITHOUT_REPLY_STRICT: Set<&str> = phf_set! {
|
static ALLOWED_TAGS_WITHOUT_REPLY_STRICT: Set<&str> = phf_set! {
|
||||||
"font", "del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
|
"del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
|
||||||
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "strike",
|
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "s",
|
||||||
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
|
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
|
||||||
"caption", "pre", "span", "img", "details", "summary",
|
"caption", "pre", "span", "img", "details", "summary",
|
||||||
};
|
};
|
||||||
@ -256,17 +311,20 @@ static ALLOWED_TAGS_WITHOUT_REPLY_STRICT: Set<&str> = phf_set! {
|
|||||||
/// The HTML tag name for a rich reply fallback.
|
/// The HTML tag name for a rich reply fallback.
|
||||||
const RICH_REPLY_TAG: &str = "mx-reply";
|
const RICH_REPLY_TAG: &str = "mx-reply";
|
||||||
|
|
||||||
|
/// HTML tags that were allowed in the Matrix specification, with their replacement.
|
||||||
|
static DEPRECATED_TAGS: Map<&str, &str> = phf_map! {
|
||||||
|
"font" => "span",
|
||||||
|
"strike" => "s",
|
||||||
|
};
|
||||||
|
|
||||||
/// Allowed attributes per HTML tag according to the Matrix specification.
|
/// Allowed attributes per HTML tag according to the Matrix specification.
|
||||||
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||||
"font" => &ALLOWED_ATTRIBUTES_FONT_STRICT,
|
|
||||||
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
|
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
|
||||||
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
|
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
|
||||||
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
|
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
|
||||||
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
|
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
|
||||||
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
|
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
|
||||||
};
|
};
|
||||||
static ALLOWED_ATTRIBUTES_FONT_STRICT: Set<&str> =
|
|
||||||
phf_set! { "data-mx-bg-color", "data-mx-color", "color" };
|
|
||||||
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
|
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
|
||||||
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler" };
|
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler" };
|
||||||
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "name", "target", "href" };
|
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "name", "target", "href" };
|
||||||
@ -275,6 +333,13 @@ static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
|
|||||||
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
|
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
|
||||||
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
|
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
|
||||||
|
|
||||||
|
/// Attributes that were allowed on HTML tags according to the Matrix specification, with their
|
||||||
|
/// replacement.
|
||||||
|
static DEPRECATED_ATTRS: Map<&str, &Map<&str, &str>> = phf_map! {
|
||||||
|
"font" => &DEPRECATED_ATTRIBUTES_FONT,
|
||||||
|
};
|
||||||
|
static DEPRECATED_ATTRIBUTES_FONT: Map<&str, &str> = phf_map! { "color" => "data-mx-color" };
|
||||||
|
|
||||||
/// Allowed schemes of URIs per HTML tag and attribute tuple according to the Matrix specification.
|
/// Allowed schemes of URIs per HTML tag and attribute tuple according to the Matrix specification.
|
||||||
static ALLOWED_SCHEMES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
static ALLOWED_SCHEMES_STRICT: Map<&str, &Set<&str>> = phf_map! {
|
||||||
"a:href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
|
"a:href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
|
||||||
|
@ -122,7 +122,7 @@ fn attrs_remove() {
|
|||||||
let mut html = Html::parse(
|
let mut html = Html::parse(
|
||||||
"\
|
"\
|
||||||
<h1 id=\"anchor1\">Title for important stuff</h1>\
|
<h1 id=\"anchor1\">Title for important stuff</h1>\
|
||||||
<p class=\"important\">Look at <font color=\"blue\" size=20>me!</font></p>\
|
<p class=\"important\">Look at <span data-mx-color=\"#0000ff\" size=20>me!</span></p>\
|
||||||
",
|
",
|
||||||
);
|
);
|
||||||
html.sanitize_with(config);
|
html.sanitize_with(config);
|
||||||
@ -131,7 +131,7 @@ fn attrs_remove() {
|
|||||||
html.to_string(),
|
html.to_string(),
|
||||||
"\
|
"\
|
||||||
<h1>Title for important stuff</h1>\
|
<h1>Title for important stuff</h1>\
|
||||||
<p>Look at <font color=\"blue\">me!</font></p>\
|
<p>Look at <span data-mx-color=\"#0000ff\">me!</span></p>\
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -246,3 +246,21 @@ fn depth_remove() {
|
|||||||
assert!(res.contains("I should be fine."));
|
assert!(res.contains("I should be fine."));
|
||||||
assert!(!res.contains("I am in too deep!"));
|
assert!(!res.contains("I am in too deep!"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replace_deprecated() {
|
||||||
|
let config = SanitizerConfig::strict();
|
||||||
|
let mut html = Html::parse(
|
||||||
|
"\
|
||||||
|
<p>Look at <strike>you </strike><font data-mx-bg-color=\"#ff0000\" color=\"#0000ff\">me!</span></p>\
|
||||||
|
",
|
||||||
|
);
|
||||||
|
html.sanitize_with(config);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
html.to_string(),
|
||||||
|
"\
|
||||||
|
<p>Look at <s>you </s><span data-mx-bg-color=\"#ff0000\" data-mx-color=\"#0000ff\">me!</span></p>\
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user