From a710229d9d3e31430ccffeaa68a7316367382e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Commaille?= Date: Wed, 19 Oct 2022 17:21:07 +0200 Subject: [PATCH] events: Fix markdown content detection Encoded HTML entities would be detected as a change. Use the parsed events instead to check if markdown syntax was detected. --- crates/ruma-common/src/events/message.rs | 7 +-- crates/ruma-common/src/events/room/message.rs | 47 ++++++++++++++++--- .../ruma-common/tests/events/room_message.rs | 22 +++++++++ 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/crates/ruma-common/src/events/message.rs b/crates/ruma-common/src/events/message.rs index 7c129790..9351d3ad 100644 --- a/crates/ruma-common/src/events/message.rs +++ b/crates/ruma-common/src/events/message.rs @@ -238,12 +238,9 @@ impl Text { /// Returns `None` if no Markdown formatting was found. #[cfg(feature = "markdown")] pub fn markdown(body: impl AsRef) -> Option { - let body = body.as_ref(); - let mut html_body = String::new(); + use super::room::message::parse_markdown; - pulldown_cmark::html::push_html(&mut html_body, pulldown_cmark::Parser::new(body)); - - (html_body != format!("

{}

\n", body)).then(|| Self::html(html_body)) + parse_markdown(body.as_ref()).map(Self::html) } fn default_mimetype() -> String { diff --git a/crates/ruma-common/src/events/room/message.rs b/crates/ruma-common/src/events/room/message.rs index 21816aeb..299a422c 100644 --- a/crates/ruma-common/src/events/room/message.rs +++ b/crates/ruma-common/src/events/room/message.rs @@ -592,12 +592,7 @@ impl FormattedBody { /// Returns `None` if no Markdown formatting was found. #[cfg(feature = "markdown")] pub fn markdown(body: impl AsRef) -> Option { - let body = body.as_ref(); - let mut html_body = String::new(); - - pulldown_cmark::html::push_html(&mut html_body, pulldown_cmark::Parser::new(body)); - - (html_body != format!("

{}

\n", body)).then(|| Self::html(html_body)) + parse_markdown(body.as_ref()).map(Self::html) } /// Sanitize this `FormattedBody` if its format is `MessageFormat::Html`. @@ -636,3 +631,43 @@ pub struct CustomEventContent { #[serde(flatten)] data: JsonObject, } + +#[cfg(feature = "markdown")] +pub(crate) fn parse_markdown(text: &str) -> Option { + use pulldown_cmark::{Event, Parser, Tag}; + + let mut found_first_paragraph = false; + + let has_markdown = Parser::new(text).any(|ref event| { + let is_text = matches!(event, Event::Text(_)); + let is_break = matches!(event, Event::SoftBreak | Event::HardBreak); + let is_first_paragraph_start = if matches!(event, + Event::Start(tag) + if matches!(tag, Tag::Paragraph) + ) { + if found_first_paragraph { + false + } else { + found_first_paragraph = true; + true + } + } else { + false + }; + let is_paragraph_end = matches!(event, + Event::End(tag) + if matches!(tag, Tag::Paragraph) + ); + + !is_text && !is_break && !is_first_paragraph_start && !is_paragraph_end + }); + + if !has_markdown { + return None; + } + + let mut html_body = String::new(); + pulldown_cmark::html::push_html(&mut html_body, Parser::new(text)); + + Some(html_body) +} diff --git a/crates/ruma-common/tests/events/room_message.rs b/crates/ruma-common/tests/events/room_message.rs index 1cbe66d8..255a7777 100644 --- a/crates/ruma-common/tests/events/room_message.rs +++ b/crates/ruma-common/tests/events/room_message.rs @@ -302,6 +302,28 @@ fn markdown_content_serialization() { ); } +#[test] +#[cfg(feature = "markdown")] +fn markdown_detection() { + use ruma_common::events::room::message::FormattedBody; + + // No markdown + let formatted_body = FormattedBody::markdown("A simple message."); + assert_matches!(formatted_body, None); + + // Multiple paragraphs trigger markdown + let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs"); + formatted_body.unwrap(); + + // HTML entities don't trigger markdown. + let formatted_body = FormattedBody::markdown("A message with & HTML < entities"); + assert_matches!(formatted_body, None); + + // HTML triggers markdown. + let formatted_body = FormattedBody::markdown("An HTML message"); + formatted_body.unwrap(); +} + #[test] fn verification_request_deserialization() { let user_id = user_id!("@example2:localhost");