events: Add methods to sanitize messages

Can also remove rich reply fallbacks

Behind the `unstable-sanitize` feature.

Co-authored-by: Jonas Platte <jplatte@matrix.org>
This commit is contained in:
Kévin Commaille 2022-06-24 19:13:46 +02:00 committed by GitHub
parent 506a7bdf2e
commit cac7e09429
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1364 additions and 28 deletions

View File

@ -39,6 +39,8 @@ Improvements:
* Deserialize stringified integers for power levels without the `compat` feature
* Add `JoinRule::KnockRestricted` (MSC3787)
* Add `MatrixVersionId::V10` (MSC3604)
* Add methods to sanitize messages according to the spec behind the `unstable-sanitize` feature
* Can also remove rich reply fallbacks
# 0.9.2

View File

@ -33,6 +33,7 @@ rand = ["rand_crate", "uuid"]
unstable-exhaustive-types = []
unstable-pdu = []
unstable-pre-spec = []
unstable-sanitize = ["html5ever", "phf"]
unstable-msc1767 = []
unstable-msc2448 = []
unstable-msc2676 = []
@ -53,12 +54,14 @@ base64 = "0.13.0"
bytes = "1.0.1"
form_urlencoded = "1.0.0"
getrandom = { version = "0.2.6", optional = true }
html5ever = { version = "0.25.2", optional = true }
http = { version = "0.2.2", optional = true }
indexmap = { version = "1.6.2", features = ["serde-1"] }
itoa = "1.0.1"
js_int = { version = "0.2.0", features = ["serde"] }
js_option = "0.1.0"
percent-encoding = "2.1.0"
phf = { version = "0.10.1", features = ["macros"], optional = true }
pulldown-cmark = { version = "0.9.1", default-features = false, optional = true }
rand_crate = { package = "rand", version = "0.8.3", optional = true }
regex = { version = "1.5.6", default-features = false, features = ["std", "perf"] }

View File

@ -0,0 +1,11 @@
<!-- Keep this comment so the content is always included as a new paragraph -->
This constructor requires an [`OriginalRoomMessageEvent`] since it creates a permalink to
the previous message, for which the room ID is required. If you want to reply to an
[`OriginalSyncRoomMessageEvent`], you have to convert it first by calling
[`.into_full_event()`][crate::events::OriginalSyncMessageLikeEvent::into_full_event].
It is recommended to enable the `sanitize` feature when using this method as this will
clean up nested [rich reply fallbacks] in chains of replies. This uses [`sanitize_html()`]
internally, with [`RemoveReplyFallback::Yes`].
[rich reply fallbacks]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies

View File

@ -23,6 +23,7 @@ mod location;
mod notice;
mod relation_serde;
mod reply;
pub mod sanitize;
mod server_notice;
mod text;
mod video;
@ -34,6 +35,10 @@ pub use image::ImageMessageEventContent;
pub use key_verification_request::KeyVerificationRequestEventContent;
pub use location::{LocationInfo, LocationMessageEventContent};
pub use notice::NoticeMessageEventContent;
#[cfg(feature = "unstable-sanitize")]
use sanitize::{
remove_plain_reply_fallback, sanitize_html, HtmlSanitizerMode, RemoveReplyFallback,
};
pub use server_notice::{LimitType, ServerNoticeMessageEventContent, ServerNoticeType};
pub use text::TextMessageEventContent;
pub use video::{VideoInfo, VideoMessageEventContent};
@ -99,11 +104,7 @@ impl RoomMessageEventContent {
}
/// Creates a plain text reply to a message.
///
/// This constructor requires an [`OriginalRoomMessageEvent`] since it creates a permalink to
/// the previous message, for which the room ID is required. If you want to reply to an
/// [`OriginalSyncRoomMessageEvent`], you have to convert it first by calling
/// [`.into_full_event()`][crate::events::OriginalSyncMessageLikeEvent::into_full_event].
#[doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/doc/rich_reply.md"))]
pub fn text_reply_plain(
reply: impl fmt::Display,
original_message: &OriginalRoomMessageEvent,
@ -121,11 +122,7 @@ impl RoomMessageEventContent {
}
/// Creates a html text reply to a message.
///
/// This constructor requires an [`OriginalRoomMessageEvent`] since it creates a permalink to
/// the previous message, for which the room ID is required. If you want to reply to an
/// [`OriginalSyncRoomMessageEvent`], you have to convert it first by calling
/// [`.into_full_event()`][crate::events::OriginalSyncMessageLikeEvent::into_full_event].
#[doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/doc/rich_reply.md"))]
pub fn text_reply_html(
reply: impl fmt::Display,
html_reply: impl fmt::Display,
@ -143,11 +140,7 @@ impl RoomMessageEventContent {
}
/// Creates a plain text notice reply to a message.
///
/// This constructor requires an [`OriginalRoomMessageEvent`] since it creates a permalink to
/// the previous message, for which the room ID is required. If you want to reply to an
/// [`OriginalSyncRoomMessageEvent`], you have to convert it first by calling
/// [`.into_full_event()`][crate::events::OriginalSyncMessageLikeEvent::into_full_event].
#[doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/doc/rich_reply.md"))]
pub fn notice_reply_plain(
reply: impl fmt::Display,
original_message: &OriginalRoomMessageEvent,
@ -165,11 +158,7 @@ impl RoomMessageEventContent {
}
/// Creates a html text notice reply to a message.
///
/// This constructor requires an [`OriginalRoomMessageEvent`] since it creates a permalink to
/// the previous message, for which the room ID is required. If you want to reply to an
/// [`OriginalSyncRoomMessageEvent`], you have to convert it first by calling
/// [`.into_full_event()`][crate::events::OriginalSyncMessageLikeEvent::into_full_event].
#[doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/doc/rich_reply.md"))]
pub fn notice_reply_html(
reply: impl fmt::Display,
html_reply: impl fmt::Display,
@ -190,6 +179,7 @@ impl RoomMessageEventContent {
///
/// If `message` is a text, an emote or a notice message, it is modified to include the rich
/// reply fallback.
#[doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/doc/rich_reply.md"))]
#[cfg(feature = "unstable-msc3440")]
pub fn reply(
message: MessageType,
@ -240,6 +230,7 @@ impl RoomMessageEventContent {
///
/// If `message` is a text, an emote or a notice message, and this is a reply in the thread, it
/// is modified to include the rich reply fallback.
#[doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/doc/rich_reply.md"))]
#[cfg(feature = "unstable-msc3440")]
pub fn for_thread(
message: MessageType,
@ -301,6 +292,39 @@ impl RoomMessageEventContent {
pub fn body(&self) -> &str {
self.msgtype.body()
}
/// Sanitize this message.
///
/// If this message contains HTML, this removes the [tags and attributes] that are not listed in
/// the Matrix specification.
///
/// It can also optionally remove the [rich reply fallback] from the plain text and HTML
/// message.
///
/// This method is only effective on text, notice and emote messages.
///
/// [tags and attributes]: https://spec.matrix.org/v1.2/client-server-api/#mroommessage-msgtypes
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
#[cfg(feature = "unstable-sanitize")]
pub fn sanitize(
&mut self,
mode: HtmlSanitizerMode,
remove_reply_fallback: RemoveReplyFallback,
) {
if let MessageType::Emote(EmoteMessageEventContent { body, formatted, .. })
| MessageType::Notice(NoticeMessageEventContent { body, formatted, .. })
| MessageType::Text(TextMessageEventContent { body, formatted, .. }) = &mut self.msgtype
{
if let Some(formatted) = formatted {
formatted.sanitize_html(mode, remove_reply_fallback);
}
if remove_reply_fallback == RemoveReplyFallback::Yes
&& matches!(self.relates_to, Some(Relation::Reply { .. }))
{
*body = remove_plain_reply_fallback(body).to_owned();
}
}
}
}
/// Whether or not to forward a [`Relation::Thread`] when sending a reply.
@ -634,6 +658,27 @@ impl FormattedBody {
(html_body != format!("<p>{}</p>\n", body)).then(|| Self::html(html_body))
}
/// Sanitize this `FormattedBody` if its format is `MessageFormat::Html`.
///
/// This removes any [tags and attributes] that are not listed in the Matrix specification.
///
/// It can also optionally remove the [rich reply fallback].
///
/// Returns the sanitized HTML if the format is `MessageFormat::Html`.
///
/// [tags and attributes]: https://spec.matrix.org/v1.2/client-server-api/#mroommessage-msgtypes
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
#[cfg(feature = "unstable-sanitize")]
pub fn sanitize_html(
&mut self,
mode: HtmlSanitizerMode,
remove_reply_fallback: RemoveReplyFallback,
) {
if self.format == MessageFormat::Html {
self.body = sanitize_html(&self.body, mode, remove_reply_fallback);
}
}
}
/// The payload for a custom message event.

View File

@ -1,10 +1,13 @@
use std::fmt;
use super::{FormattedBody, MessageType, OriginalRoomMessageEvent};
use super::{
sanitize::remove_plain_reply_fallback, FormattedBody, MessageType, OriginalRoomMessageEvent,
Relation,
};
#[cfg(feature = "unstable-sanitize")]
use super::{sanitize_html, HtmlSanitizerMode, RemoveReplyFallback};
pub fn get_message_quote_fallbacks(
original_message: &OriginalRoomMessageEvent,
) -> (String, String) {
fn get_message_quote_fallbacks(original_message: &OriginalRoomMessageEvent) -> (String, String) {
match &original_message.content.msgtype {
MessageType::Audio(_) => get_quotes("sent an audio file.", None, original_message, false),
MessageType::Emote(content) => {
@ -36,8 +39,13 @@ fn get_quotes(
original_message: &OriginalRoomMessageEvent,
is_emote: bool,
) -> (String, String) {
let OriginalRoomMessageEvent { room_id, event_id, sender, .. } = original_message;
let OriginalRoomMessageEvent { room_id, event_id, sender, content, .. } = original_message;
let is_reply = matches!(content.relates_to, Some(Relation::Reply { .. }));
let emote_sign = is_emote.then(|| "* ").unwrap_or_default();
let body = is_reply.then(|| remove_plain_reply_fallback(body)).unwrap_or(body);
#[cfg(feature = "unstable-sanitize")]
let html_body = formatted_or_plain_body(formatted, body, is_reply);
#[cfg(not(feature = "unstable-sanitize"))]
let html_body = formatted_or_plain_body(formatted, body);
(
@ -55,18 +63,31 @@ fn get_quotes(
)
}
fn formatted_or_plain_body(formatted: Option<&FormattedBody>, body: &str) -> String {
fn formatted_or_plain_body(
formatted: Option<&FormattedBody>,
body: &str,
#[cfg(feature = "unstable-sanitize")] is_reply: bool,
) -> String {
if let Some(formatted_body) = formatted {
#[cfg(feature = "unstable-sanitize")]
if is_reply {
sanitize_html(&formatted_body.body, HtmlSanitizerMode::Strict, RemoveReplyFallback::Yes)
} else {
formatted_body.body.clone()
}
#[cfg(not(feature = "unstable-sanitize"))]
formatted_body.body.clone()
} else {
let mut escaped_body = String::with_capacity(body.len());
for c in body.chars() {
// Escape reserved HTML entities and new lines.
// <https://developer.mozilla.org/en-US/docs/Glossary/Entity#reserved_characters>
let s = match c {
'&' => Some("&amp;"),
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'"' => Some("&quot;"),
'\'' => Some("&apos;"),
'\n' => Some("<br>"),
_ => None,
};
@ -83,6 +104,13 @@ fn formatted_or_plain_body(formatted: Option<&FormattedBody>, body: &str) -> Str
/// Get the plain and formatted body for a rich reply.
///
/// Returns a `(plain, html)` tuple.
///
/// With the `sanitize` feature, [HTML tags and attributes] that are not allowed in the Matrix
/// spec and previous [rich reply fallbacks] are removed from the previous message in the new rich
/// reply fallback.
///
/// [HTML tags and attributes]: https://spec.matrix.org/v1.2/client-server-api/#mroommessage-msgtypes
/// [rich reply fallbacks]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
pub fn plain_and_formatted_reply_body(
body: impl fmt::Display,
formatted: Option<impl fmt::Display>,

View File

@ -0,0 +1,209 @@
//! Convenience methods and types to sanitize text messages.
#[cfg(feature = "unstable-sanitize")]
mod html_fragment;
#[cfg(feature = "unstable-sanitize")]
mod html_sanitizer;
#[cfg(feature = "unstable-sanitize")]
use html_sanitizer::HtmlSanitizer;
/// Sanitize the given HTML string.
///
/// This removes the [tags and attributes] that are not listed in the Matrix specification.
///
/// It can also optionally remove the [rich reply fallback].
///
/// [tags and attributes]: https://spec.matrix.org/v1.2/client-server-api/#mroommessage-msgtypes
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
#[cfg(feature = "unstable-sanitize")]
pub fn sanitize_html(
s: &str,
mode: HtmlSanitizerMode,
remove_reply_fallback: RemoveReplyFallback,
) -> String {
let sanitizer = HtmlSanitizer::new(mode, remove_reply_fallback);
sanitizer.clean(s)
}
/// What HTML [tags and attributes] should be kept by the sanitizer.
///
/// [tags and attributes]: https://spec.matrix.org/v1.2/client-server-api/#mroommessage-msgtypes
#[cfg(feature = "unstable-sanitize")]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[allow(clippy::exhaustive_enums)]
pub enum HtmlSanitizerMode {
/// Keep only the tags and attributes listed in the Matrix specification.
Strict,
/// Like `Strict` mode, with additional tags and attributes that are not yet included in
/// the spec, but are reasonable to keep.
Compat,
}
/// Whether to remove the [rich reply fallback] while sanitizing.
///
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
#[cfg(feature = "unstable-sanitize")]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[allow(clippy::exhaustive_enums)]
pub enum RemoveReplyFallback {
/// Remove the rich reply fallback.
Yes,
/// Don't remove the rich reply fallback.
No,
}
/// Remove the [rich reply fallback] of the given HTML string.
///
/// Due to the fact that the HTML is parsed, note that malformed HTML and comments will be stripped
/// from the output.
///
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
#[cfg(feature = "unstable-sanitize")]
pub fn remove_html_reply_fallback(s: &str) -> String {
let sanitizer = HtmlSanitizer::reply_fallback_remover();
sanitizer.clean(s)
}
/// Remove the [rich reply fallback] of the given plain text string.
///
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
pub fn remove_plain_reply_fallback(mut s: &str) -> &str {
while s.starts_with("> ") {
if let Some((_line, rest)) = s.split_once('\n') {
s = rest;
} else {
return "";
}
}
s
}
#[cfg(test)]
mod tests {
use super::remove_plain_reply_fallback;
#[cfg(feature = "unstable-sanitize")]
use super::{
remove_html_reply_fallback, sanitize_html, HtmlSanitizerMode, RemoveReplyFallback,
};
#[test]
#[cfg(feature = "unstable-sanitize")]
fn sanitize() {
let sanitized = sanitize_html(
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
<removed>This has no tag</removed>\
<p>But this is inside a tag</p>\
",
HtmlSanitizerMode::Strict,
RemoveReplyFallback::No,
);
assert_eq!(
sanitized,
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
This has no tag\
<p>But this is inside a tag</p>\
"
);
}
#[test]
#[cfg(feature = "unstable-sanitize")]
fn sanitize_without_reply() {
let sanitized = sanitize_html(
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
<removed>This has no tag</removed>\
<p>But this is inside a tag</p>\
",
HtmlSanitizerMode::Strict,
RemoveReplyFallback::Yes,
);
assert_eq!(
sanitized,
"\
This has no tag\
<p>But this is inside a tag</p>\
"
);
}
#[test]
#[cfg(feature = "unstable-sanitize")]
fn remove_html_reply() {
let without_reply = remove_html_reply_fallback(
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
<keep-me>This keeps its tag</keep-me>\
<p>But this is inside a tag</p>\
",
);
assert_eq!(
without_reply,
"\
<keep-me>This keeps its tag</keep-me>\
<p>But this is inside a tag</p>\
"
);
}
#[test]
fn remove_plain_reply() {
assert_eq!(
remove_plain_reply_fallback("No reply here\nJust a simple message"),
"No reply here\nJust a simple message"
);
assert_eq!(
remove_plain_reply_fallback(
"> <@user:notareal.hs> Replied to on\n\
> two lines\n\
This is my reply"
),
"This is my reply"
);
assert_eq!(remove_plain_reply_fallback("\n> Not on first line"), "\n> Not on first line");
assert_eq!(
remove_plain_reply_fallback("> <@user:notareal.hs> Previous message\n\n> New quote"),
"\n> New quote"
);
}
}

View File

@ -0,0 +1,396 @@
use std::{collections::BTreeSet, fmt, io};
use html5ever::{
local_name, namespace_url, ns, parse_fragment,
serialize::{serialize, Serialize, SerializeOpts, Serializer, TraversalScope},
tendril::{StrTendril, TendrilSink},
tree_builder::{NodeOrText, TreeSink},
Attribute, ParseOpts, QualName,
};
use tracing::debug;
/// An HTML fragment.
///
/// To get the serialized HTML, use its `Display` implementation.
#[derive(Debug)]
pub struct Fragment {
pub nodes: Vec<Node>,
}
impl Fragment {
/// Construct a new `Fragment` by parsing the given HTML.
pub fn parse_html(html: &str) -> Self {
let sink = Self::default();
let mut parser = parse_fragment(
sink,
ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("div")),
Vec::new(),
);
parser.process(html.into());
parser.finish()
}
/// Construct a new `Node` with the given data and add it to this `Fragment`.
///
/// Returns the index of the new node.
pub fn new_node(&mut self, data: NodeData) -> usize {
self.nodes.push(Node::new(data));
self.nodes.len() - 1
}
/// Append the given node to the given parent in this `Fragment`.
///
/// The node is detached from its previous position.
pub fn append_node(&mut self, parent_id: usize, node_id: usize) {
self.detach(node_id);
self.nodes[node_id].parent = Some(parent_id);
if let Some(last_child) = self.nodes[parent_id].last_child.take() {
self.nodes[node_id].prev_sibling = Some(last_child);
self.nodes[last_child].next_sibling = Some(node_id);
} else {
self.nodes[parent_id].first_child = Some(node_id);
}
self.nodes[parent_id].last_child = Some(node_id);
}
/// Insert the given node before the given sibling in this `Fragment`.
///
/// The node is detached from its previous position.
pub fn insert_before(&mut self, sibling_id: usize, node_id: usize) {
self.detach(node_id);
self.nodes[node_id].parent = self.nodes[sibling_id].parent;
self.nodes[node_id].next_sibling = Some(sibling_id);
if let Some(prev_sibling) = self.nodes[sibling_id].prev_sibling.take() {
self.nodes[node_id].prev_sibling = Some(prev_sibling);
self.nodes[prev_sibling].next_sibling = Some(node_id);
} else if let Some(parent) = self.nodes[sibling_id].parent {
self.nodes[parent].first_child = Some(node_id);
}
self.nodes[sibling_id].prev_sibling = Some(node_id);
}
/// Detach the given node from this `Fragment`.
pub fn detach(&mut self, node_id: usize) {
let (parent, prev_sibling, next_sibling) = {
let node = &mut self.nodes[node_id];
(node.parent.take(), node.prev_sibling.take(), node.next_sibling.take())
};
if let Some(next_sibling) = next_sibling {
self.nodes[next_sibling].prev_sibling = prev_sibling;
} else if let Some(parent) = parent {
self.nodes[parent].last_child = prev_sibling;
}
if let Some(prev_sibling) = prev_sibling {
self.nodes[prev_sibling].next_sibling = next_sibling;
} else if let Some(parent) = parent {
self.nodes[parent].first_child = next_sibling;
}
}
}
impl Default for Fragment {
fn default() -> Self {
Self { nodes: vec![Node::new(NodeData::Document)] }
}
}
impl TreeSink for Fragment {
type Handle = usize;
type Output = Self;
fn finish(self) -> Self::Output {
self
}
fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
debug!("HTML parse error: {msg}");
}
fn get_document(&mut self) -> Self::Handle {
0
}
fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> html5ever::ExpandedName<'a> {
self.nodes[*target].as_element().expect("not an element").name.expanded()
}
fn create_element(
&mut self,
name: QualName,
attrs: Vec<Attribute>,
_flags: html5ever::tree_builder::ElementFlags,
) -> Self::Handle {
self.new_node(NodeData::Element(ElementData { name, attrs: attrs.into_iter().collect() }))
}
fn create_comment(&mut self, _text: StrTendril) -> Self::Handle {
self.new_node(NodeData::Other)
}
fn create_pi(&mut self, _target: StrTendril, _data: StrTendril) -> Self::Handle {
self.new_node(NodeData::Other)
}
fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) {
match child {
NodeOrText::AppendNode(index) => self.append_node(*parent, index),
NodeOrText::AppendText(text) => {
// If the previous sibling is also text, add this text to it.
if let Some(sibling) =
self.nodes[*parent].last_child.and_then(|child| self.nodes[child].as_text_mut())
{
sibling.push_tendril(&text);
} else {
let index = self.new_node(NodeData::Text(text));
self.append_node(*parent, index);
}
}
}
}
fn append_based_on_parent_node(
&mut self,
element: &Self::Handle,
prev_element: &Self::Handle,
child: NodeOrText<Self::Handle>,
) {
if self.nodes[*element].parent.is_some() {
self.append_before_sibling(element, child)
} else {
self.append(prev_element, child)
}
}
fn append_doctype_to_document(
&mut self,
_name: StrTendril,
_public_id: StrTendril,
_system_id: StrTendril,
) {
}
fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
*target
}
fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
x == y
}
fn set_quirks_mode(&mut self, _mode: html5ever::tree_builder::QuirksMode) {}
fn append_before_sibling(
&mut self,
sibling: &Self::Handle,
new_node: NodeOrText<Self::Handle>,
) {
match new_node {
NodeOrText::AppendNode(index) => self.insert_before(*sibling, index),
NodeOrText::AppendText(text) => {
// If the previous sibling is also text, add this text to it.
if let Some(prev_text) = self.nodes[*sibling]
.prev_sibling
.and_then(|prev| self.nodes[prev].as_text_mut())
{
prev_text.push_tendril(&text);
} else {
let index = self.new_node(NodeData::Text(text));
self.insert_before(*sibling, index)
}
}
}
}
fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<html5ever::Attribute>) {
let target = self.nodes[*target].as_element_mut().unwrap();
target.attrs.extend(attrs.into_iter())
}
fn remove_from_parent(&mut self, target: &Self::Handle) {
self.detach(*target);
}
fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
let mut next_child = self.nodes[*node].first_child;
while let Some(child) = next_child {
next_child = self.nodes[child].next_sibling;
self.append_node(*new_parent, child);
}
}
}
impl Serialize for Fragment {
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
where
S: Serializer,
{
match traversal_scope {
TraversalScope::IncludeNode => {
let root = self.nodes[0].first_child.unwrap();
let mut next_child = self.nodes[root].first_child;
while let Some(child) = next_child {
let child = &self.nodes[child];
child.serialize(self, serializer)?;
next_child = child.next_sibling;
}
Ok(())
}
TraversalScope::ChildrenOnly(_) => Ok(()),
}
}
}
impl fmt::Display for Fragment {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut u8_vec = Vec::new();
serialize(
&mut u8_vec,
self,
SerializeOpts { traversal_scope: TraversalScope::IncludeNode, ..Default::default() },
)
.unwrap();
f.write_str(&String::from_utf8(u8_vec).unwrap())?;
Ok(())
}
}
/// An HTML node.
#[derive(Debug)]
pub struct Node {
pub parent: Option<usize>,
pub prev_sibling: Option<usize>,
pub next_sibling: Option<usize>,
pub first_child: Option<usize>,
pub last_child: Option<usize>,
pub data: NodeData,
}
impl Node {
/// Constructs a new `Node` with the given data.
pub fn new(data: NodeData) -> Self {
Self {
parent: None,
prev_sibling: None,
next_sibling: None,
first_child: None,
last_child: None,
data,
}
}
/// Returns the `ElementData` of this `Node` if it is a `NodeData::Element`.
pub fn as_element(&self) -> Option<&ElementData> {
match &self.data {
NodeData::Element(data) => Some(data),
_ => None,
}
}
/// Returns the mutable `ElementData` of this `Node` if it is a `NodeData::Element`.
pub fn as_element_mut(&mut self) -> Option<&mut ElementData> {
match &mut self.data {
NodeData::Element(data) => Some(data),
_ => None,
}
}
/// Returns the mutable text content of this `Node`, if it is a `NodeData::Text`.
pub fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
match &mut self.data {
NodeData::Text(data) => Some(data),
_ => None,
}
}
}
impl Node {
pub fn serialize<S>(&self, fragment: &Fragment, serializer: &mut S) -> io::Result<()>
where
S: Serializer,
{
match &self.data {
NodeData::Element(ref data) => {
serializer.start_elem(
data.name.clone(),
data.attrs.iter().map(|attr| (&attr.name, &*attr.value)),
)?;
let mut next_child = self.first_child;
while let Some(child) = next_child {
let child = &fragment.nodes[child];
child.serialize(fragment, serializer)?;
next_child = child.next_sibling;
}
serializer.end_elem(data.name.clone())?;
Ok(())
}
NodeData::Document => {
let mut next_child = self.first_child;
while let Some(child) = next_child {
let child = &fragment.nodes[child];
child.serialize(fragment, serializer)?;
next_child = child.next_sibling;
}
Ok(())
}
NodeData::Text(ref text) => serializer.write_text(&**text),
_ => Ok(()),
}
}
}
/// The data of a `Node`.
#[derive(Debug)]
pub enum NodeData {
/// The root node of the `Fragment`.
Document,
/// A text node.
Text(StrTendril),
/// An HTML element (aka a tag).
Element(ElementData),
/// Other types (comment, processing instruction, …).
Other,
}
/// The data of an HTML element.
#[derive(Debug)]
pub struct ElementData {
/// The qualified name of the element.
pub name: QualName,
/// The attributes of the element.
pub attrs: BTreeSet<Attribute>,
}
#[cfg(test)]
mod tests {
use super::Fragment;
#[test]
fn sanity() {
let html = "\
<h1>Title</h1>\
<div>\
<p>This is some <em>text</em></p>\
</div>\
";
assert_eq!(Fragment::parse_html(html).to_string(), html);
assert_eq!(Fragment::parse_html("").to_string(), "");
}
}

View File

@ -0,0 +1,542 @@
use html5ever::{tendril::StrTendril, Attribute};
use phf::{phf_map, phf_set, Map, Set};
use wildmatch::WildMatch;
use super::{
html_fragment::{ElementData, Fragment, NodeData},
HtmlSanitizerMode, RemoveReplyFallback,
};
/// A sanitizer to filter [HTML tags and attributes] according to the Matrix specification.
///
/// [HTML tags and attributes]: https://spec.matrix.org/v1.2/client-server-api/#mroommessage-msgtypes
#[derive(Debug, Clone)]
pub struct HtmlSanitizer {
/// The mode of the HTML sanitizer.
mode: HtmlSanitizerMode,
/// Whether to filter HTML tags and attributes.
///
/// If this is `true`, tags and attributes that do not match the lists will be removed, but
/// the tags' children will still be present in the output.
///
/// If this is `false`, all the tags and attributes are allowed.
filter_tags_attributes: bool,
/// Whether to remove replies.
///
/// If this is `true`, the rich reply fallback will be removed.
///
/// If this is `false`, the rich reply tag will be allowed.
remove_replies: bool,
}
impl HtmlSanitizer {
/// Constructs a `HTMLSanitizer` that will filter the tags and attributes according to the given
/// mode.
///
/// It can also optionally remove the [rich reply fallback].
///
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
pub fn new(mode: HtmlSanitizerMode, remove_reply_fallback: RemoveReplyFallback) -> Self {
Self {
mode,
filter_tags_attributes: true,
remove_replies: remove_reply_fallback == RemoveReplyFallback::Yes,
}
}
/// Constructs a `HTMLSanitizer` instance that only removes the [rich reply fallback].
///
/// [rich reply fallback]: https://spec.matrix.org/v1.2/client-server-api/#fallbacks-for-rich-replies
pub fn reply_fallback_remover() -> Self {
Self {
mode: HtmlSanitizerMode::Strict,
filter_tags_attributes: false,
remove_replies: true,
}
}
/// Clean the given HTML string with this sanitizer.
pub fn clean(&self, html: &str) -> String {
let mut fragment = Fragment::parse_html(html);
let root = fragment.nodes[0].first_child.unwrap();
let mut next_child = fragment.nodes[root].first_child;
while let Some(child) = next_child {
next_child = fragment.nodes[child].next_sibling;
self.clean_node(&mut fragment, child, 0);
}
fragment.to_string()
}
fn clean_node(&self, fragment: &mut Fragment, node_id: usize, depth: u32) {
let action = self.node_action(fragment, node_id, depth);
if action != NodeAction::Remove {
let mut next_child = fragment.nodes[node_id].first_child;
while let Some(child) = next_child {
next_child = fragment.nodes[child].next_sibling;
if action == NodeAction::Ignore {
fragment.insert_before(node_id, child)
}
self.clean_node(fragment, child, depth + 1);
}
}
if matches!(action, NodeAction::Ignore | NodeAction::Remove) {
fragment.detach(node_id);
} else if self.filter_tags_attributes {
if let Some(data) = fragment.nodes[node_id].as_element_mut() {
self.clean_element_attributes(data);
}
}
}
fn node_action(&self, fragment: &Fragment, node_id: usize, depth: u32) -> NodeAction {
match &fragment.nodes[node_id].data {
NodeData::Element(ElementData { name, attrs, .. }) => {
let tag: &str = &name.local;
if (self.remove_replies && tag == RICH_REPLY_TAG)
|| (self.filter_tags_attributes && depth >= MAX_DEPTH_STRICT)
{
NodeAction::Remove
} else if self.filter_tags_attributes
&& (!ALLOWED_TAGS_WITHOUT_REPLY_STRICT.contains(tag) && tag != RICH_REPLY_TAG)
{
NodeAction::Ignore
} else if self.filter_tags_attributes {
let allowed_schemes = if self.mode == HtmlSanitizerMode::Strict {
&ALLOWED_SCHEMES_STRICT
} else {
&ALLOWED_SCHEMES_COMPAT
};
for attr in attrs.iter() {
let value = &attr.value;
let attr: &str = &attr.name.local;
// Check if there is a (tag, attr) tuple entry.
if let Some(schemes) = allowed_schemes.get(&*format!("{tag}:{attr}")) {
// Check if the scheme is allowed.
if !schemes
.iter()
.any(|scheme| value.starts_with(&format!("{scheme}:")))
{
return NodeAction::Ignore;
}
}
}
NodeAction::None
} else {
NodeAction::None
}
}
NodeData::Text(_) => NodeAction::None,
_ => NodeAction::Remove,
}
}
fn clean_element_attributes(&self, data: &mut ElementData) {
let ElementData { name, attrs } = data;
let tag: &str = &name.local;
let actions: Vec<_> = attrs
.iter()
.filter_map(|attr| {
let value = &attr.value;
let name: &str = &attr.name.local;
if ALLOWED_ATTRIBUTES_STRICT.get(tag).filter(|attrs| attrs.contains(name)).is_none()
{
return Some(AttributeAction::Remove(attr.to_owned()));
}
if name == "class" {
if let Some(classes) = ALLOWED_CLASSES_STRICT.get(tag) {
let mut changed = false;
let attr_classes = value.split_whitespace().filter(|attr_class| {
for class in classes.iter() {
if WildMatch::new(class).matches(attr_class) {
return true;
}
}
changed = true;
false
});
let folded_classes = attr_classes.fold(String::new(), |mut a, b| {
a.reserve(b.len() + 1);
a.push_str(b);
a.push('\n');
a
});
let final_classes = folded_classes.trim_end();
if changed {
if final_classes.is_empty() {
return Some(AttributeAction::Remove(attr.to_owned()));
} else {
return Some(AttributeAction::ReplaceValue(
attr.to_owned(),
final_classes.to_owned().into(),
));
}
}
}
}
None
})
.collect();
for action in actions {
match action {
AttributeAction::ReplaceValue(attr, value) => {
if let Some(mut attr) = attrs.take(&attr) {
attr.value = value;
attrs.insert(attr);
}
}
AttributeAction::Remove(attr) => {
attrs.remove(&attr);
}
}
}
}
}
/// The possible actions to apply to an element node.
#[derive(Debug, PartialEq, Eq)]
enum NodeAction {
/// Don't do anything.
None,
/// Remove the element but keep its children.
Ignore,
/// Remove the element and its children.
Remove,
}
/// The possible actions to apply to an element node.
#[derive(Debug)]
enum AttributeAction {
/// Replace the value of the attribute.
ReplaceValue(Attribute, StrTendril),
/// Remove the element and its children.
Remove(Attribute),
}
/// List of HTML tags allowed in the Matrix specification, without the rich reply fallback tag.
static ALLOWED_TAGS_WITHOUT_REPLY_STRICT: Set<&str> = phf_set! {
"font", "del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "strike",
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
"caption", "pre", "span", "img", "details", "summary",
};
/// The HTML tag name for a rich reply fallback.
const RICH_REPLY_TAG: &str = "mx-reply";
/// Allowed attributes per HTML tag according to the Matrix specification.
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
"font" => &ALLOWED_ATTRIBUTES_FONT_STRICT,
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
};
static ALLOWED_ATTRIBUTES_FONT_STRICT: Set<&str> =
phf_set! { "data-mx-bg-color", "data-mx-color", "color" };
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler" };
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "name", "target", "href" };
static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
phf_set! { "width", "height", "alt", "title", "src" };
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
/// Allowed schemes of URIs per HTML tag and attribute tuple according to the Matrix specification.
static ALLOWED_SCHEMES_STRICT: Map<&str, &Set<&str>> = phf_map! {
"a:href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
"img:src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
};
static ALLOWED_SCHEMES_A_HREF_STRICT: Set<&str> =
phf_set! { "http", "https", "ftp", "mailto", "magnet" };
static ALLOWED_SCHEMES_IMG_SRC_STRICT: Set<&str> = phf_set! { "mxc" };
/// Extra allowed schemes of URIs per HTML tag and attribute tuple.
///
/// This is a convenience list to add schemes that can be encountered but are not listed in the
/// Matrix specification. It consists of:
///
/// * The `matrix` scheme for `a` tags (see [matrix-org/matrix-spec#1108]).
///
/// To get a complete list, add these to `ALLOWED_SCHEMES_STRICT`.
///
/// [matrix-org/matrix-spec#1108]: https://github.com/matrix-org/matrix-spec/issues/1108
static ALLOWED_SCHEMES_COMPAT: Map<&str, &Set<&str>> = phf_map! {
"a:href" => &ALLOWED_SCHEMES_A_HREF_COMPAT,
"img:src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
};
static ALLOWED_SCHEMES_A_HREF_COMPAT: Set<&str> =
phf_set! { "http", "https", "ftp", "mailto", "magnet", "matrix" };
/// Allowed classes per HTML tag according to the Matrix specification.
static ALLOWED_CLASSES_STRICT: Map<&str, &Set<&str>> =
phf_map! { "code" => &ALLOWED_CLASSES_CODE_STRICT };
static ALLOWED_CLASSES_CODE_STRICT: Set<&str> = phf_set! { "language-*" };
/// Max depth of nested HTML tags allowed by the Matrix specification.
const MAX_DEPTH_STRICT: u32 = 100;
#[cfg(test)]
mod tests {
use super::{HtmlSanitizer, HtmlSanitizerMode, RemoveReplyFallback};
#[test]
fn valid_input() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::Yes);
let sanitized = sanitizer.clean(
"\
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
<img src=\"mxc://notareal.hs/abcdef\">\
<code class=\"language-html\">&lt;mx-reply&gt;This is a fake reply&lt;/mx-reply&gt;</code>\
",
);
assert_eq!(
sanitized,
"\
<ul><li>This</li><li>has</li><li>no</li><li>tag</li></ul>\
<p>This is a paragraph <span data-mx-color=\"green\">with some color</span></p>\
<img src=\"mxc://notareal.hs/abcdef\">\
<code class=\"language-html\">&lt;mx-reply&gt;This is a fake reply&lt;/mx-reply&gt;</code>\
"
);
}
#[test]
fn tags_remove() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
<removed>This has no tag</removed>\
<p>But this is inside a tag</p>\
",
);
assert_eq!(
sanitized,
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
This has no tag\
<p>But this is inside a tag</p>\
"
);
}
#[test]
fn tags_remove_without_reply() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::Yes);
let sanitized = sanitizer.clean(
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
<removed>This has no tag</removed>\
<p>But this is inside a tag</p>\
",
);
assert_eq!(
sanitized,
"\
This has no tag\
<p>But this is inside a tag</p>\
"
);
}
#[test]
fn tags_remove_only_reply_fallback() {
let sanitizer = HtmlSanitizer::reply_fallback_remover();
let sanitized = sanitizer.clean(
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!n8f893n9:example.com/$1598361704261elfgc:localhost\">In reply to</a> \
<a href=\"https://matrix.to/#/@alice:example.com\">@alice:example.com</a>\
<br>\
Previous message\
</blockquote>\
</mx-reply>\
<keep-me>This keeps its tag</keep-me>\
<p>But this is inside a tag</p>\
",
);
assert_eq!(
sanitized,
"\
<keep-me>This keeps its tag</keep-me>\
<p>But this is inside a tag</p>\
"
);
}
#[test]
fn attrs_remove() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<h1 id=\"anchor1\">Title for important stuff</h1>\
<p class=\"important\">Look at <font color=\"blue\" size=20>me!</font></p>\
",
);
assert_eq!(
sanitized,
"\
<h1>Title for important stuff</h1>\
<p>Look at <font color=\"blue\">me!</font></p>\
"
);
}
#[test]
fn img_remove_scheme() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<p>Look at that picture:</p>\
<img src=\"https://notareal.hs/abcdef\">\
",
);
assert_eq!(
sanitized,
"\
<p>Look at that picture:</p>\
"
);
}
#[test]
fn link_remove_scheme() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<p>Go see <a href=\"file://local/file.html\">my local website</a></p>\
",
);
assert_eq!(
sanitized,
"\
<p>Go see my local website</p>\
"
);
}
#[test]
fn link_compat_scheme() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<p>Join <a href=\"matrix:r/myroom:notareal.hs\">my room</a></p>\
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
",
);
assert_eq!(
sanitized,
"\
<p>Join my room</p>\
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
"
);
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Compat, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<p>Join <a href=\"matrix:r/myroom:notareal.hs\">my room</a></p>\
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
",
);
assert_eq!(
sanitized,
"\
<p>Join <a href=\"matrix:r/myroom:notareal.hs\">my room</a></p>\
<p>To talk about <a href=\"https://mycat.org\">my cat</a></p>\
"
);
}
#[test]
fn class_remove() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let sanitized = sanitizer.clean(
"\
<pre><code class=\"language-rust custom-class\">
type StringList = Vec&lt;String&gt;;
</code></pre>\
<p>What do you think of the name <code class=\"fake-language-rust\">StringList</code>?</p>\
",
);
assert_eq!(
sanitized,
"\
<pre><code class=\"language-rust\">
type StringList = Vec&lt;String&gt;;
</code></pre>\
<p>What do you think of the name <code>StringList</code>?</p>\
"
);
}
#[test]
fn depth_remove() {
let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::No);
let deeply_nested_html: String = std::iter::repeat("<div>")
.take(100)
.chain(Some(
"<span>I am in too deep!</span>\
I should be fine.",
))
.chain(std::iter::repeat("</div>").take(100))
.collect();
let sanitized = sanitizer.clean(&deeply_nested_html);
assert!(sanitized.contains("I should be fine."));
assert!(!sanitized.contains("I am in too deep!"));
}
}

View File

@ -488,3 +488,100 @@ fn content_deserialization_failure() {
});
assert_matches!(from_json_value::<RoomMessageEventContent>(json_data), Err(_));
}
#[test]
#[cfg(feature = "unstable-sanitize")]
fn reply_sanitize() {
use ruma_common::events::room::message::TextMessageEventContent;
let first_message = OriginalRoomMessageEvent {
content: RoomMessageEventContent::text_html(
"# This is the first message",
"<h1>This is the first message</h1>",
),
event_id: event_id!("$143273582443PhrSn:example.org").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(uint!(10_000)),
room_id: room_id!("!testroomid:example.org").to_owned(),
sender: user_id!("@user:example.org").to_owned(),
unsigned: MessageLikeUnsigned::default(),
};
let second_message = OriginalRoomMessageEvent {
content: RoomMessageEventContent::text_reply_html(
"This is the _second_ message",
"This is the <em>second</em> message",
&first_message,
),
event_id: event_id!("$143273582443PhrSn:example.org").to_owned(),
origin_server_ts: MilliSecondsSinceUnixEpoch(uint!(10_000)),
room_id: room_id!("!testroomid:example.org").to_owned(),
sender: user_id!("@user:example.org").to_owned(),
unsigned: MessageLikeUnsigned::default(),
};
let final_reply = RoomMessageEventContent::text_reply_html(
"This is **my** reply",
"This is <strong>my</strong> reply",
&second_message,
);
let (body, formatted) = assert_matches!(
first_message.content.msgtype,
MessageType::Text(TextMessageEventContent { body, formatted, .. }) => (body, formatted)
);
assert_eq!(body, "# This is the first message");
let formatted = formatted.unwrap();
assert_eq!(formatted.body, "<h1>This is the first message</h1>");
let (body, formatted) = assert_matches!(
second_message.content.msgtype,
MessageType::Text(TextMessageEventContent { body, formatted, .. }) => (body, formatted)
);
assert_eq!(
body,
"\
> <@user:example.org> # This is the first message\n\
This is the _second_ message\
"
);
let formatted = formatted.unwrap();
assert_eq!(
formatted.body,
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!testroomid:example.org/$143273582443PhrSn:example.org\">In reply to</a> \
<a href=\"https://matrix.to/#/@user:example.org\">@user:example.org</a>\
<br>\
<h1>This is the first message</h1>\
</blockquote>\
</mx-reply>\
This is the <em>second</em> message\
"
);
let (body, formatted) = assert_matches!(
final_reply.msgtype,
MessageType::Text(TextMessageEventContent { body, formatted, .. }) => (body, formatted)
);
assert_eq!(
body,
"\
> <@user:example.org> This is the _second_ message\n\
This is **my** reply\
"
);
let formatted = formatted.unwrap();
assert_eq!(
formatted.body,
"\
<mx-reply>\
<blockquote>\
<a href=\"https://matrix.to/#/!testroomid:example.org/$143273582443PhrSn:example.org\">In reply to</a> \
<a href=\"https://matrix.to/#/@user:example.org\">@user:example.org</a>\
<br>\
This is the <em>second</em> message\
</blockquote>\
</mx-reply>\
This is <strong>my</strong> reply\
"
);
}

View File

@ -116,6 +116,7 @@ unstable-pre-spec = [
"ruma-federation-api?/unstable-pre-spec",
"ruma-push-gateway-api?/unstable-pre-spec",
]
unstable-sanitize = ["ruma-common/unstable-sanitize"]
unstable-msc1767 = ["ruma-common/unstable-msc1767"]
unstable-msc2246 = ["ruma-client-api?/unstable-msc2246"]
unstable-msc2448 = [
@ -157,6 +158,7 @@ unstable-msc3723 = ["ruma-federation-api?/unstable-msc3723"]
__ci = [
"full",
"unstable-pre-spec",
"unstable-sanitize",
"unstable-msc1767",
"unstable-msc2448",
"unstable-msc2666",

View File

@ -37,7 +37,6 @@
//!
//! These features are only useful if you want to use a method that requires it:
//!
//! * `either`
//! * `rand`
//! * `markdown`
//!
@ -51,6 +50,8 @@
//! * `unstable-mscXXXX`, where `XXXX` is the MSC number -- Upcoming Matrix features that may be
//! subject to change or removal.
//! * `unstable-pre-spec` -- Undocumented Matrix features that may be subject to change or removal.
//! * `unstable-sanitize` -- Convenience methods for spec-compliant HTML sanitization that have not
//! been thoroughly tested.
//!
//! # Common features
//!