diff --git a/Cargo.toml b/Cargo.toml index 02c47ab5..4ca18bba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ ruma-common = { version = "0.11.3", path = "crates/ruma-common" } ruma-client = { version = "0.11.0", path = "crates/ruma-client" } ruma-client-api = { version = "0.16.2", path = "crates/ruma-client-api" } ruma-federation-api = { version = "0.7.1", path = "crates/ruma-federation-api" } +ruma-html = { version = "0.1.0", path = "crates/ruma-html" } ruma-identifiers-validation = { version = "0.9.1", path = "crates/ruma-identifiers-validation" } ruma-identity-service-api = { version = "0.7.1", path = "crates/ruma-identity-service-api" } ruma-macros = { version = "=0.11.3", path = "crates/ruma-macros" } diff --git a/crates/ruma-common/CHANGELOG.md b/crates/ruma-common/CHANGELOG.md index 24a8f406..e5a31abe 100644 --- a/crates/ruma-common/CHANGELOG.md +++ b/crates/ruma-common/CHANGELOG.md @@ -41,6 +41,8 @@ Breaking changes: - `RoomMessageEventContent::make_reply_to()` and `make_for_thread()` have an extra parameter to support the recommended behavior for intentional mentions in replies according to Matrix 1.7 - In Markdown, soft line breaks are transformed into hard line breaks when compiled into HTML. +- Move the HTML functions in `events::room::message::sanitize` to the ruma-html crate + - The `unstable-sanitize` cargo feature was renamed to `html` Improvements: @@ -62,7 +64,7 @@ Improvements: - `user_can_send_message` - `user_can_send_state` - `user_can_trigger_room_notification` -- Add `MessageType::sanitize` behind the `unstable-sanitize` feature +- Add `MessageType::sanitize` behind the `html` feature - Add `MatrixVersion::V1_7` and `MatrixVersion::V1_8` - Stabilize support for annotations and reactions (MSC2677 / Matrix 1.7) - Add support for intentional mentions push rules (MSC3952 / Matrix 1.7) @@ -275,7 +277,7 @@ Improvements: * Deserialize stringified integers for power levels without the `compat` feature * Add `JoinRule::KnockRestricted` (MSC3787) * Add `MatrixVersionId::V10` (MSC3604) -* Add methods to sanitize messages according to the spec behind the `unstable-sanitize` feature +* Add methods to sanitize messages according to the spec behind the `html` feature * Can also remove rich reply fallbacks * Implement `From` for `identifiers::matrix_uri::MatrixId` * Add unstable default push rule to ignore room server ACLs events (MSC3786) diff --git a/crates/ruma-common/Cargo.toml b/crates/ruma-common/Cargo.toml index c73d036e..55443a98 100644 --- a/crates/ruma-common/Cargo.toml +++ b/crates/ruma-common/Cargo.toml @@ -24,6 +24,7 @@ server = [] api = ["dep:http", "dep:konst"] canonical-json = [] events = [] +html = ["dep:ruma-html"] js = ["dep:js-sys", "getrandom?/js", "uuid?/js"] markdown = ["pulldown-cmark"] rand = ["dep:rand", "dep:uuid"] @@ -47,7 +48,6 @@ unstable-msc3954 = ["unstable-msc1767"] unstable-msc3955 = ["unstable-msc1767"] unstable-msc3956 = ["unstable-msc1767"] unstable-pdu = [] -unstable-sanitize = ["dep:html5ever", "dep:phf"] unstable-unspecified = [] # Don't validate the version part in `KeyId`. @@ -77,17 +77,16 @@ base64 = { workspace = true } bytes = "1.0.1" form_urlencoded = "1.0.0" getrandom = { version = "0.2.6", optional = true } -html5ever = { version = "0.26.0", optional = true } http = { workspace = true, optional = true } indexmap = { version = "2.0.0", features = ["serde"] } js_int = { workspace = true, features = ["serde"] } js_option = "0.1.0" konst = { version = "0.3.5", default-features = false, features = ["cmp", "iter", "parsing"], optional = true } percent-encoding = "2.1.0" -phf = { version = "0.11.1", features = ["macros"], optional = true } pulldown-cmark = { version = "0.9.1", default-features = false, optional = true } rand = { version = "0.8.3", optional = true } regex = { version = "1.5.6", default-features = false, features = ["std", "perf"] } +ruma-html = { workspace = true, optional = true } ruma-identifiers-validation = { workspace = true } ruma-macros = { workspace = true } serde = { workspace = true } diff --git a/crates/ruma-common/src/doc/rich_reply.md b/crates/ruma-common/src/doc/rich_reply.md index a25b14c7..f0bec908 100644 --- a/crates/ruma-common/src/doc/rich_reply.md +++ b/crates/ruma-common/src/doc/rich_reply.md @@ -7,7 +7,7 @@ the previous message, for which the room ID is required. If you want to reply to If the message was edited, the previous message should be the original message that was edited, with the content of its replacement, to allow the fallback to be accurate at the time it is added. -It is recommended to enable the `unstable-sanitize` feature when using this method as this will +It is recommended to enable the `html` feature when using this method as this will clean up nested [rich reply fallbacks] in chains of replies. This uses [`sanitize_html()`] internally, with [`RemoveReplyFallback::Yes`]. diff --git a/crates/ruma-common/src/events/room/message.rs b/crates/ruma-common/src/events/room/message.rs index bed1172e..0378c8f6 100644 --- a/crates/ruma-common/src/events/room/message.rs +++ b/crates/ruma-common/src/events/room/message.rs @@ -4,6 +4,8 @@ use std::borrow::Cow; +#[cfg(feature = "html")] +use ruma_html::{sanitize_html, HtmlSanitizerMode, RemoveReplyFallback}; use ruma_macros::EventContent; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use serde_json::Value as JsonValue; @@ -40,10 +42,8 @@ pub use key_verification_request::KeyVerificationRequestEventContent; pub use location::{LocationInfo, LocationMessageEventContent}; pub use notice::NoticeMessageEventContent; pub use relation_serde::deserialize_relation; -#[cfg(feature = "unstable-sanitize")] -use sanitize::{ - remove_plain_reply_fallback, sanitize_html, HtmlSanitizerMode, RemoveReplyFallback, -}; +#[cfg(feature = "html")] +use sanitize::remove_plain_reply_fallback; pub use server_notice::{LimitType, ServerNoticeMessageEventContent, ServerNoticeType}; pub use text::TextMessageEventContent; pub use video::{VideoInfo, VideoMessageEventContent}; @@ -432,7 +432,7 @@ impl RoomMessageEventContent { /// /// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes /// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies - #[cfg(feature = "unstable-sanitize")] + #[cfg(feature = "html")] pub fn sanitize( &mut self, mode: HtmlSanitizerMode, @@ -758,7 +758,7 @@ impl MessageType { /// /// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes /// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies - #[cfg(feature = "unstable-sanitize")] + #[cfg(feature = "html")] pub fn sanitize( &mut self, mode: HtmlSanitizerMode, @@ -907,7 +907,7 @@ impl FormattedBody { /// /// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes /// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies - #[cfg(feature = "unstable-sanitize")] + #[cfg(feature = "html")] pub fn sanitize_html( &mut self, mode: HtmlSanitizerMode, diff --git a/crates/ruma-common/src/events/room/message/reply.rs b/crates/ruma-common/src/events/room/message/reply.rs index 270cf4fd..5af53af4 100644 --- a/crates/ruma-common/src/events/room/message/reply.rs +++ b/crates/ruma-common/src/events/room/message/reply.rs @@ -1,11 +1,12 @@ use std::fmt::{self, Write}; +#[cfg(feature = "html")] +use ruma_html::{HtmlSanitizer, HtmlSanitizerMode, RemoveReplyFallback}; + use super::{ sanitize::remove_plain_reply_fallback, FormattedBody, MessageType, OriginalRoomMessageEvent, Relation, }; -#[cfg(feature = "unstable-sanitize")] -use super::{sanitize::HtmlSanitizer, HtmlSanitizerMode, RemoveReplyFallback}; fn get_message_quote_fallbacks(original_message: &OriginalRoomMessageEvent) -> (String, String) { let get_quotes = |body: &str, formatted: Option<&FormattedBody>, is_emote: bool| { @@ -13,9 +14,9 @@ fn get_message_quote_fallbacks(original_message: &OriginalRoomMessageEvent) -> ( let is_reply = matches!(content.relates_to, Some(Relation::Reply { .. })); let emote_sign = is_emote.then_some("* ").unwrap_or_default(); let body = is_reply.then(|| remove_plain_reply_fallback(body)).unwrap_or(body); - #[cfg(feature = "unstable-sanitize")] + #[cfg(feature = "html")] let html_body = FormattedOrPlainBody { formatted, body, is_reply }; - #[cfg(not(feature = "unstable-sanitize"))] + #[cfg(not(feature = "html"))] let html_body = FormattedOrPlainBody { formatted, body }; ( @@ -72,14 +73,14 @@ impl fmt::Display for EscapeHtmlEntities<'_> { struct FormattedOrPlainBody<'a> { formatted: Option<&'a FormattedBody>, body: &'a str, - #[cfg(feature = "unstable-sanitize")] + #[cfg(feature = "html")] is_reply: bool, } impl fmt::Display for FormattedOrPlainBody<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(formatted_body) = self.formatted { - #[cfg(feature = "unstable-sanitize")] + #[cfg(feature = "html")] if self.is_reply { let sanitizer = HtmlSanitizer::new(HtmlSanitizerMode::Strict, RemoveReplyFallback::Yes); @@ -88,7 +89,7 @@ impl fmt::Display for FormattedOrPlainBody<'_> { f.write_str(&formatted_body.body) } - #[cfg(not(feature = "unstable-sanitize"))] + #[cfg(not(feature = "html"))] f.write_str(&formatted_body.body) } else { write!(f, "{}", EscapeHtmlEntities(self.body)) diff --git a/crates/ruma-common/src/events/room/message/sanitize.rs b/crates/ruma-common/src/events/room/message/sanitize.rs index d2e5c64f..5bfc4508 100644 --- a/crates/ruma-common/src/events/room/message/sanitize.rs +++ b/crates/ruma-common/src/events/room/message/sanitize.rs @@ -1,52 +1,9 @@ //! Convenience methods and types to sanitize text messages. -#![allow(unreachable_pub)] // https://github.com/rust-lang/rust/issues/112615 - -#[cfg(feature = "unstable-sanitize")] -mod html_fragment; -#[cfg(feature = "unstable-sanitize")] -mod html_sanitizer; - -#[cfg(feature = "unstable-sanitize")] -pub(super) use html_sanitizer::HtmlSanitizer; - -/// Sanitize the given HTML string. -/// -/// This removes the [tags and attributes] that are not listed in the Matrix specification. -/// -/// It can also optionally remove the [rich reply fallback]. -/// -/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes -/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies -#[cfg(feature = "unstable-sanitize")] -pub fn sanitize_html( - s: &str, - mode: HtmlSanitizerMode, - remove_reply_fallback: RemoveReplyFallback, -) -> String { - let sanitizer = HtmlSanitizer::new(mode, remove_reply_fallback); - sanitizer.clean(s).to_string() -} - -/// What HTML [tags and attributes] should be kept by the sanitizer. -/// -/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes -#[cfg(feature = "unstable-sanitize")] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[allow(clippy::exhaustive_enums)] -pub enum HtmlSanitizerMode { - /// Keep only the tags and attributes listed in the Matrix specification. - Strict, - - /// Like `Strict` mode, with additional tags and attributes that are not yet included in - /// the spec, but are reasonable to keep. - Compat, -} - /// Whether to remove the [rich reply fallback] while sanitizing. /// /// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies -#[cfg(feature = "unstable-sanitize")] +#[cfg(feature = "html")] #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[allow(clippy::exhaustive_enums)] pub enum RemoveReplyFallback { @@ -57,18 +14,6 @@ pub enum RemoveReplyFallback { No, } -/// Remove the [rich reply fallback] of the given HTML string. -/// -/// Due to the fact that the HTML is parsed, note that malformed HTML and comments will be stripped -/// from the output. -/// -/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies -#[cfg(feature = "unstable-sanitize")] -pub fn remove_html_reply_fallback(s: &str) -> String { - let sanitizer = HtmlSanitizer::reply_fallback_remover(); - sanitizer.clean(s).to_string() -} - /// Remove the [rich reply fallback] of the given plain text string. /// /// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies @@ -96,103 +41,6 @@ pub fn remove_plain_reply_fallback(mut s: &str) -> &str { #[cfg(test)] mod tests { use super::remove_plain_reply_fallback; - #[cfg(feature = "unstable-sanitize")] - use super::{ - remove_html_reply_fallback, sanitize_html, HtmlSanitizerMode, RemoveReplyFallback, - }; - - #[test] - #[cfg(feature = "unstable-sanitize")] - fn sanitize() { - let sanitized = sanitize_html( - "\ - \ -
\ - In reply to \ - @alice:example.com\ -
\ - Previous message\ -
\ -
\ - This has no tag\ -

But this is inside a tag

\ - ", - HtmlSanitizerMode::Strict, - RemoveReplyFallback::No, - ); - - assert_eq!( - sanitized, - "\ - \ -
\ - In reply to \ - @alice:example.com\ -
\ - Previous message\ -
\ -
\ - This has no tag\ -

But this is inside a tag

\ - " - ); - } - - #[test] - #[cfg(feature = "unstable-sanitize")] - fn sanitize_without_reply() { - let sanitized = sanitize_html( - "\ - \ -
\ - In reply to \ - @alice:example.com\ -
\ - Previous message\ -
\ -
\ - This has no tag\ -

But this is inside a tag

\ - ", - HtmlSanitizerMode::Strict, - RemoveReplyFallback::Yes, - ); - - assert_eq!( - sanitized, - "\ - This has no tag\ -

But this is inside a tag

\ - " - ); - } - - #[test] - #[cfg(feature = "unstable-sanitize")] - fn remove_html_reply() { - let without_reply = remove_html_reply_fallback( - "\ - \ -
\ - In reply to \ - @alice:example.com\ -
\ - Previous message\ -
\ -
\ - This keeps its tag\ -

But this is inside a tag

\ - ", - ); - - assert_eq!( - without_reply, - "\ - This keeps its tag\ -

But this is inside a tag

\ - " - ); - } #[test] fn remove_plain_reply() { diff --git a/crates/ruma-common/tests/events/room_message.rs b/crates/ruma-common/tests/events/room_message.rs index 8e867e91..c062ea62 100644 --- a/crates/ruma-common/tests/events/room_message.rs +++ b/crates/ruma-common/tests/events/room_message.rs @@ -349,7 +349,7 @@ fn escape_tags_in_plain_reply_body() { } #[test] -#[cfg(feature = "unstable-sanitize")] +#[cfg(feature = "html")] fn reply_sanitize() { use ruma_common::events::room::message::ForwardThread; diff --git a/crates/ruma-html/CHANGELOG.md b/crates/ruma-html/CHANGELOG.md new file mode 100644 index 00000000..ab8edc0c --- /dev/null +++ b/crates/ruma-html/CHANGELOG.md @@ -0,0 +1,3 @@ +# 0.1.0 (unreleased) + +Initial release diff --git a/crates/ruma-html/Cargo.toml b/crates/ruma-html/Cargo.toml new file mode 100644 index 00000000..600c3546 --- /dev/null +++ b/crates/ruma-html/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "ruma-html" +version = "0.1.0" +description = "Opinionated HTML parsing and manipulating." +homepage = "https://www.ruma.io/" +keywords = ["matrix", "chat", "messaging", "ruma", "html", "parser"] +license = "MIT" +readme = "README.md" +repository = "https://github.com/ruma/ruma" +edition = "2021" +rust-version = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +html5ever = "0.26.0" +phf = { version = "0.11.1", features = ["macros"] } +tracing = { workspace = true, features = ["attributes"] } +wildmatch = "2.0.0" diff --git a/crates/ruma-html/README.md b/crates/ruma-html/README.md new file mode 100644 index 00000000..7ee48936 --- /dev/null +++ b/crates/ruma-html/README.md @@ -0,0 +1,12 @@ +# ruma-html + +[![crates.io page](https://img.shields.io/crates/v/ruma-html.svg)](https://crates.io/crates/ruma-html) +[![docs.rs page](https://docs.rs/ruma-html/badge.svg)](https://docs.rs/ruma-html/) +![license: MIT](https://img.shields.io/crates/l/ruma-html.svg) + +Opinionated HTML parsing and manipulating library. + +Like the rest of the Ruma crates, this crate is primarily meant to be used for +the Matrix protocol. It should be able to be used to interact with any HTML +content but will offer APIs focused on specificities of HTML in the Matrix +specification. \ No newline at end of file diff --git a/crates/ruma-common/src/events/room/message/sanitize/html_fragment.rs b/crates/ruma-html/src/html_fragment.rs similarity index 95% rename from crates/ruma-common/src/events/room/message/sanitize/html_fragment.rs rename to crates/ruma-html/src/html_fragment.rs index 94b46caa..fabbd34b 100644 --- a/crates/ruma-common/src/events/room/message/sanitize/html_fragment.rs +++ b/crates/ruma-html/src/html_fragment.rs @@ -13,8 +13,8 @@ use tracing::debug; /// /// To get the serialized HTML, use its `Display` implementation. #[derive(Debug)] -pub(crate) struct Fragment { - pub nodes: Vec, +pub struct Fragment { + pub(crate) nodes: Vec, } impl Fragment { @@ -265,13 +265,14 @@ impl fmt::Display for Fragment { /// An HTML node. #[derive(Debug)] -pub(crate) struct Node { - pub parent: Option, - pub prev_sibling: Option, - pub next_sibling: Option, - pub first_child: Option, - pub last_child: Option, - pub data: NodeData, +#[non_exhaustive] +pub struct Node { + pub(crate) parent: Option, + pub(crate) prev_sibling: Option, + pub(crate) next_sibling: Option, + pub(crate) first_child: Option, + pub(crate) last_child: Option, + pub(crate) data: NodeData, } impl Node { @@ -313,7 +314,7 @@ impl Node { } impl Node { - pub fn serialize(&self, fragment: &Fragment, serializer: &mut S) -> io::Result<()> + pub(crate) fn serialize(&self, fragment: &Fragment, serializer: &mut S) -> io::Result<()> where S: Serializer, { @@ -353,7 +354,8 @@ impl Node { /// The data of a `Node`. #[derive(Debug)] -pub(crate) enum NodeData { +#[allow(clippy::exhaustive_enums)] +pub enum NodeData { /// The root node of the `Fragment`. Document, @@ -369,7 +371,8 @@ pub(crate) enum NodeData { /// The data of an HTML element. #[derive(Debug)] -pub(crate) struct ElementData { +#[allow(clippy::exhaustive_structs)] +pub struct ElementData { /// The qualified name of the element. pub name: QualName, diff --git a/crates/ruma-html/src/lib.rs b/crates/ruma-html/src/lib.rs new file mode 100644 index 00000000..3dd6fcee --- /dev/null +++ b/crates/ruma-html/src/lib.rs @@ -0,0 +1,19 @@ +#![doc(html_favicon_url = "https://www.ruma.io/favicon.ico")] +#![doc(html_logo_url = "https://www.ruma.io/images/logo.png")] +//! Opinionated HTML parsing and manipulating library. +//! +//! Like the rest of the Ruma crates, this crate is primarily meant to be used for +//! the Matrix protocol. It should be able to be used to interact with any HTML +//! document but will offer APIs focused on specificities of HTML in the Matrix +//! specification.. + +#![warn(missing_docs)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +mod html_fragment; +mod sanitize; + +pub use self::{ + html_fragment::{ElementData, Fragment, Node, NodeData}, + sanitize::*, +}; diff --git a/crates/ruma-html/src/sanitize.rs b/crates/ruma-html/src/sanitize.rs new file mode 100644 index 00000000..e3a7b33b --- /dev/null +++ b/crates/ruma-html/src/sanitize.rs @@ -0,0 +1,157 @@ +//! Convenience methods and types to sanitize HTML messages. + +mod html_sanitizer; + +pub use self::html_sanitizer::HtmlSanitizer; + +/// Sanitize the given HTML string. +/// +/// This removes the [tags and attributes] that are not listed in the Matrix specification. +/// +/// It can also optionally remove the [rich reply fallback]. +/// +/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes +/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies +pub fn sanitize_html( + s: &str, + mode: HtmlSanitizerMode, + remove_reply_fallback: RemoveReplyFallback, +) -> String { + let sanitizer = HtmlSanitizer::new(mode, remove_reply_fallback); + sanitizer.clean(s).to_string() +} + +/// What HTML [tags and attributes] should be kept by the sanitizer. +/// +/// [tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[allow(clippy::exhaustive_enums)] +pub enum HtmlSanitizerMode { + /// Keep only the tags and attributes listed in the Matrix specification. + Strict, + + /// Like `Strict` mode, with additional tags and attributes that are not yet included in + /// the spec, but are reasonable to keep. + Compat, +} + +/// Whether to remove the [rich reply fallback] while sanitizing. +/// +/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[allow(clippy::exhaustive_enums)] +pub enum RemoveReplyFallback { + /// Remove the rich reply fallback. + Yes, + + /// Don't remove the rich reply fallback. + No, +} + +/// Remove the [rich reply fallback] of the given HTML string. +/// +/// Due to the fact that the HTML is parsed, note that malformed HTML and comments will be stripped +/// from the output. +/// +/// [rich reply fallback]: https://spec.matrix.org/latest/client-server-api/#fallbacks-for-rich-replies +pub fn remove_html_reply_fallback(s: &str) -> String { + let sanitizer = HtmlSanitizer::reply_fallback_remover(); + sanitizer.clean(s).to_string() +} + +#[cfg(test)] +mod tests { + use super::{ + remove_html_reply_fallback, sanitize_html, HtmlSanitizerMode, RemoveReplyFallback, + }; + + #[test] + fn sanitize() { + let sanitized = sanitize_html( + "\ + \ +
\ + In reply to \ + @alice:example.com\ +
\ + Previous message\ +
\ +
\ + This has no tag\ +

But this is inside a tag

\ + ", + HtmlSanitizerMode::Strict, + RemoveReplyFallback::No, + ); + + assert_eq!( + sanitized, + "\ + \ +
\ + In reply to \ + @alice:example.com\ +
\ + Previous message\ +
\ +
\ + This has no tag\ +

But this is inside a tag

\ + " + ); + } + + #[test] + fn sanitize_without_reply() { + let sanitized = sanitize_html( + "\ + \ +
\ + In reply to \ + @alice:example.com\ +
\ + Previous message\ +
\ +
\ + This has no tag\ +

But this is inside a tag

\ + ", + HtmlSanitizerMode::Strict, + RemoveReplyFallback::Yes, + ); + + assert_eq!( + sanitized, + "\ + This has no tag\ +

But this is inside a tag

\ + " + ); + } + + #[test] + fn remove_html_reply() { + let without_reply = remove_html_reply_fallback( + "\ + \ +
\ + In reply to \ + @alice:example.com\ +
\ + Previous message\ +
\ +
\ + This keeps its tag\ +

But this is inside a tag

\ + ", + ); + + assert_eq!( + without_reply, + "\ + This keeps its tag\ +

But this is inside a tag

\ + " + ); + } +} diff --git a/crates/ruma-common/src/events/room/message/sanitize/html_sanitizer.rs b/crates/ruma-html/src/sanitize/html_sanitizer.rs similarity index 99% rename from crates/ruma-common/src/events/room/message/sanitize/html_sanitizer.rs rename to crates/ruma-html/src/sanitize/html_sanitizer.rs index d32ce366..bfed7076 100644 --- a/crates/ruma-common/src/events/room/message/sanitize/html_sanitizer.rs +++ b/crates/ruma-html/src/sanitize/html_sanitizer.rs @@ -2,16 +2,14 @@ use html5ever::{tendril::StrTendril, Attribute}; use phf::{phf_map, phf_set, Map, Set}; use wildmatch::WildMatch; -use super::{ - html_fragment::{ElementData, Fragment, NodeData}, - HtmlSanitizerMode, RemoveReplyFallback, -}; +use super::{HtmlSanitizerMode, RemoveReplyFallback}; +use crate::{ElementData, Fragment, NodeData}; /// A sanitizer to filter [HTML tags and attributes] according to the Matrix specification. /// /// [HTML tags and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes #[derive(Debug, Clone)] -pub(crate) struct HtmlSanitizer { +pub struct HtmlSanitizer { /// The mode of the HTML sanitizer. mode: HtmlSanitizerMode, diff --git a/crates/ruma/Cargo.toml b/crates/ruma/Cargo.toml index bd324ad9..8c5b81db 100644 --- a/crates/ruma/Cargo.toml +++ b/crates/ruma/Cargo.toml @@ -70,6 +70,7 @@ js = ["ruma-common/js"] # Convenience features rand = ["ruma-common/rand"] markdown = ["ruma-common/markdown"] +html = ["dep:ruma-html", "ruma-common/html"] # Everything except compat, js and unstable features full = [ @@ -86,6 +87,7 @@ full = [ "push-gateway-api", "rand", "markdown", + "html", ] # Enable all compatibility hacks. Deprecated. @@ -189,7 +191,6 @@ unstable-msc3954 = ["ruma-common/unstable-msc3954"] unstable-msc3955 = ["ruma-common/unstable-msc3955"] unstable-msc3956 = ["ruma-common/unstable-msc3956"] unstable-pdu = ["ruma-common/unstable-pdu"] -unstable-sanitize = ["ruma-common/unstable-sanitize"] unstable-unspecified = [ "ruma-common/unstable-unspecified", "ruma-federation-api?/unstable-unspecified", @@ -201,7 +202,6 @@ __ci = [ "full", "compat-upload-signatures", "unstable-unspecified", - "unstable-sanitize", "unstable-msc1767", "unstable-msc2409", "unstable-msc2448", @@ -239,6 +239,7 @@ js_option = "0.1.1" ruma-common = { workspace = true } ruma-client = { workspace = true, optional = true } +ruma-html = { workspace = true, optional = true } ruma-server-util = { workspace = true, optional = true } ruma-signatures = { workspace = true, optional = true } ruma-state-res = { workspace = true, optional = true } diff --git a/crates/ruma/src/lib.rs b/crates/ruma/src/lib.rs index 1aef491e..9190cbbc 100644 --- a/crates/ruma/src/lib.rs +++ b/crates/ruma/src/lib.rs @@ -40,6 +40,7 @@ //! //! * `rand` //! * `markdown` +//! * `html` //! //! # Unstable features //! @@ -52,8 +53,6 @@ //! subject to change or removal. //! * `unstable-unspecified` -- Undocumented Matrix features that may be subject to change or //! removal. -//! * `unstable-sanitize` -- Convenience methods for spec-compliant HTML sanitization that have not -//! been thoroughly tested. //! //! # Common features //! @@ -82,6 +81,9 @@ pub use ruma_client as client; #[cfg(feature = "events")] #[doc(inline)] pub use ruma_common::events; +#[cfg(feature = "html")] +#[doc(inline)] +pub use ruma_html as html; #[cfg(feature = "server-util")] #[doc(inline)] pub use ruma_server_util as server_util;