From 278a45aec84c73d6877e5bb6334a891a8c441739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20Commaille?= Date: Thu, 24 Oct 2024 20:26:27 +0200 Subject: [PATCH] events: Remove p tag around inline markdown As recommended by the spec. --- crates/ruma-events/CHANGELOG.md | 2 + crates/ruma-events/src/room/message.rs | 71 ++++++++++++++++++--- crates/ruma-events/tests/it/message.rs | 2 +- crates/ruma-events/tests/it/room_message.rs | 20 +++--- 4 files changed, 76 insertions(+), 19 deletions(-) diff --git a/crates/ruma-events/CHANGELOG.md b/crates/ruma-events/CHANGELOG.md index c31549a0..3fc1c0b6 100644 --- a/crates/ruma-events/CHANGELOG.md +++ b/crates/ruma-events/CHANGELOG.md @@ -10,6 +10,8 @@ Bug fixes: - `Restricted` no longer fails to deserialize when the `allow` field is missing - Markdown text constructors now also detect markdown syntax like backslash escapes and entity references to decide if the text should be sent as HTML. +- Markdown text with only inline syntax generates HTML that is not wrapped + inside a `

` element anymore, as recommended by the spec. Improvements: diff --git a/crates/ruma-events/src/room/message.rs b/crates/ruma-events/src/room/message.rs index 2f1d016e..dce23958 100644 --- a/crates/ruma-events/src/room/message.rs +++ b/crates/ruma-events/src/room/message.rs @@ -915,46 +915,97 @@ pub(crate) fn parse_markdown(text: &str) -> Option { return None; } + // If the markdown contains only inline tags, i.e. if there is only one paragraph, remove the + // wrapping paragraph, as instructed by the spec. + let first_event_is_paragraph_start = + parser_events.first().is_some_and(|event| matches!(event, Event::Start(Tag::Paragraph))); + let last_event_is_paragraph_end = + parser_events.last().is_some_and(|event| matches!(event, Event::End(TagEnd::Paragraph))); + let mut has_several_blocks = true; + + if first_event_is_paragraph_start && last_event_is_paragraph_end { + has_several_blocks = parser_events.iter().skip(1).any(|event| { + if let Event::Start(tag) = event { + is_block_tag(tag) + } else { + false + } + }); + } + + let mut events_iter = parser_events.into_iter(); + if !has_several_blocks { + events_iter.next(); + events_iter.next_back(); + } + let mut html_body = String::new(); - pulldown_cmark::html::push_html(&mut html_body, parser_events.into_iter()); + pulldown_cmark::html::push_html(&mut html_body, events_iter); Some(html_body) } +/// Whether the given tag is a block HTML element. +#[cfg(feature = "markdown")] +fn is_block_tag(tag: &pulldown_cmark::Tag<'_>) -> bool { + use pulldown_cmark::Tag; + + matches!( + tag, + Tag::Paragraph + | Tag::Heading { .. } + | Tag::BlockQuote(_) + | Tag::CodeBlock(_) + | Tag::HtmlBlock + | Tag::List(_) + | Tag::FootnoteDefinition(_) + | Tag::Table(_) + ) +} + #[cfg(all(test, feature = "markdown"))] mod tests { - use assert_matches2::assert_matches; - use super::parse_markdown; #[test] fn detect_markdown() { // Simple single-line text. let text = "Hello world."; - assert_matches!(parse_markdown(text), None); + assert_eq!(parse_markdown(text), None); // Simple double-line text. let text = "Hello\nworld."; - assert_matches!(parse_markdown(text), None); + assert_eq!(parse_markdown(text), None); // With new paragraph. let text = "Hello\n\nworld."; - assert_matches!(parse_markdown(text), Some(_)); + assert_eq!(parse_markdown(text).as_deref(), Some("

Hello

\n

world.

\n")); + + // With heading and paragraph. + let text = "## Hello\n\nworld."; + assert_eq!(parse_markdown(text).as_deref(), Some("

Hello

\n

world.

\n")); + + // With paragraph and code block. + let text = "Hello\n\n```\nworld.\n```"; + assert_eq!( + parse_markdown(text).as_deref(), + Some("

Hello

\n
world.\n
\n") + ); // With tagged element. let text = "Hello **world**."; - assert_matches!(parse_markdown(text), Some(_)); + assert_eq!(parse_markdown(text).as_deref(), Some("Hello world.")); // With backslash escapes. let text = r#"Hello \."#; - assert_matches!(parse_markdown(text), Some(_)); + assert_eq!(parse_markdown(text).as_deref(), Some("Hello <world>.")); // With entity reference. let text = r#"Hello <world>."#; - assert_matches!(parse_markdown(text), Some(_)); + assert_eq!(parse_markdown(text).as_deref(), Some("Hello <world>.")); // With numeric reference. let text = "Hello w⊕rld."; - assert_matches!(parse_markdown(text), Some(_)); + assert_eq!(parse_markdown(text).as_deref(), Some("Hello w⊕rld.")); } } diff --git a/crates/ruma-events/tests/it/message.rs b/crates/ruma-events/tests/it/message.rs index 05112579..8007d6d3 100644 --- a/crates/ruma-events/tests/it/message.rs +++ b/crates/ruma-events/tests/it/message.rs @@ -82,7 +82,7 @@ fn markdown_content_serialization() { "org.matrix.msc1767.text": [ { "mimetype": "text/html", - "body": "

Testing bold and italic!

\n", + "body": "Testing bold and italic!", }, { "body": "Testing **bold** and _italic_!", diff --git a/crates/ruma-events/tests/it/room_message.rs b/crates/ruma-events/tests/it/room_message.rs index febac08c..24f1f41f 100644 --- a/crates/ruma-events/tests/it/room_message.rs +++ b/crates/ruma-events/tests/it/room_message.rs @@ -117,7 +117,7 @@ fn text_msgtype_markdown_serialization() { to_json_value(&formatted_message).unwrap(), json!({ "body": text, - "formatted_body": "

Testing bold and italic!

\n", + "formatted_body": "Testing bold and italic!", "format": "org.matrix.custom.html", "msgtype": "m.text" }) @@ -198,16 +198,20 @@ fn markdown_detection() { assert_matches!(formatted_body, None); // Multiple paragraphs trigger markdown - let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs"); - formatted_body.unwrap(); + let formatted_body = + FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs").unwrap(); + assert_eq!( + formatted_body.body, + "

A message
\nwith

\n

multiple

\n

paragraphs

\n" + ); // "Less than" symbol triggers markdown. - let formatted_body = FormattedBody::markdown("A message with & HTML < entities"); - assert_matches!(formatted_body, Some(_)); + let formatted_body = FormattedBody::markdown("A message with & HTML < entities").unwrap(); + assert_eq!(formatted_body.body, "A message with & HTML < entities"); // HTML triggers markdown. - let formatted_body = FormattedBody::markdown("An HTML message"); - formatted_body.unwrap(); + let formatted_body = FormattedBody::markdown("An HTML message").unwrap(); + assert_eq!(formatted_body.body, "An HTML message"); } #[test] @@ -232,7 +236,7 @@ fn markdown_options() { // Strikethrough let formatted_body = FormattedBody::markdown("A message with a ~~strike~~"); - assert_eq!(formatted_body.unwrap().body, "

A message with a strike

\n"); + assert_eq!(formatted_body.unwrap().body, "A message with a strike"); } #[test]