events: Remove p tag around inline markdown

As recommended by the spec.
This commit is contained in:
Kévin Commaille 2024-10-24 20:26:27 +02:00 committed by strawberry
parent c3fb396352
commit 278a45aec8
4 changed files with 76 additions and 19 deletions

View File

@ -10,6 +10,8 @@ Bug fixes:
- `Restricted` no longer fails to deserialize when the `allow` field is missing - `Restricted` no longer fails to deserialize when the `allow` field is missing
- Markdown text constructors now also detect markdown syntax like backslash - Markdown text constructors now also detect markdown syntax like backslash
escapes and entity references to decide if the text should be sent as HTML. escapes and entity references to decide if the text should be sent as HTML.
- Markdown text with only inline syntax generates HTML that is not wrapped
inside a `<p>` element anymore, as recommended by the spec.
Improvements: Improvements:

View File

@ -915,46 +915,97 @@ pub(crate) fn parse_markdown(text: &str) -> Option<String> {
return None; return None;
} }
// If the markdown contains only inline tags, i.e. if there is only one paragraph, remove the
// wrapping paragraph, as instructed by the spec.
let first_event_is_paragraph_start =
parser_events.first().is_some_and(|event| matches!(event, Event::Start(Tag::Paragraph)));
let last_event_is_paragraph_end =
parser_events.last().is_some_and(|event| matches!(event, Event::End(TagEnd::Paragraph)));
let mut has_several_blocks = true;
if first_event_is_paragraph_start && last_event_is_paragraph_end {
has_several_blocks = parser_events.iter().skip(1).any(|event| {
if let Event::Start(tag) = event {
is_block_tag(tag)
} else {
false
}
});
}
let mut events_iter = parser_events.into_iter();
if !has_several_blocks {
events_iter.next();
events_iter.next_back();
}
let mut html_body = String::new(); let mut html_body = String::new();
pulldown_cmark::html::push_html(&mut html_body, parser_events.into_iter()); pulldown_cmark::html::push_html(&mut html_body, events_iter);
Some(html_body) Some(html_body)
} }
/// Whether the given tag is a block HTML element.
#[cfg(feature = "markdown")]
fn is_block_tag(tag: &pulldown_cmark::Tag<'_>) -> bool {
use pulldown_cmark::Tag;
matches!(
tag,
Tag::Paragraph
| Tag::Heading { .. }
| Tag::BlockQuote(_)
| Tag::CodeBlock(_)
| Tag::HtmlBlock
| Tag::List(_)
| Tag::FootnoteDefinition(_)
| Tag::Table(_)
)
}
#[cfg(all(test, feature = "markdown"))] #[cfg(all(test, feature = "markdown"))]
mod tests { mod tests {
use assert_matches2::assert_matches;
use super::parse_markdown; use super::parse_markdown;
#[test] #[test]
fn detect_markdown() { fn detect_markdown() {
// Simple single-line text. // Simple single-line text.
let text = "Hello world."; let text = "Hello world.";
assert_matches!(parse_markdown(text), None); assert_eq!(parse_markdown(text), None);
// Simple double-line text. // Simple double-line text.
let text = "Hello\nworld."; let text = "Hello\nworld.";
assert_matches!(parse_markdown(text), None); assert_eq!(parse_markdown(text), None);
// With new paragraph. // With new paragraph.
let text = "Hello\n\nworld."; let text = "Hello\n\nworld.";
assert_matches!(parse_markdown(text), Some(_)); assert_eq!(parse_markdown(text).as_deref(), Some("<p>Hello</p>\n<p>world.</p>\n"));
// With heading and paragraph.
let text = "## Hello\n\nworld.";
assert_eq!(parse_markdown(text).as_deref(), Some("<h2>Hello</h2>\n<p>world.</p>\n"));
// With paragraph and code block.
let text = "Hello\n\n```\nworld.\n```";
assert_eq!(
parse_markdown(text).as_deref(),
Some("<p>Hello</p>\n<pre><code>world.\n</code></pre>\n")
);
// With tagged element. // With tagged element.
let text = "Hello **world**."; let text = "Hello **world**.";
assert_matches!(parse_markdown(text), Some(_)); assert_eq!(parse_markdown(text).as_deref(), Some("Hello <strong>world</strong>."));
// With backslash escapes. // With backslash escapes.
let text = r#"Hello \<world\>."#; let text = r#"Hello \<world\>."#;
assert_matches!(parse_markdown(text), Some(_)); assert_eq!(parse_markdown(text).as_deref(), Some("Hello &lt;world&gt;."));
// With entity reference. // With entity reference.
let text = r#"Hello &lt;world&gt;."#; let text = r#"Hello &lt;world&gt;."#;
assert_matches!(parse_markdown(text), Some(_)); assert_eq!(parse_markdown(text).as_deref(), Some("Hello &lt;world&gt;."));
// With numeric reference. // With numeric reference.
let text = "Hello w&#8853;rld."; let text = "Hello w&#8853;rld.";
assert_matches!(parse_markdown(text), Some(_)); assert_eq!(parse_markdown(text).as_deref(), Some("Hello w⊕rld."));
} }
} }

View File

@ -82,7 +82,7 @@ fn markdown_content_serialization() {
"org.matrix.msc1767.text": [ "org.matrix.msc1767.text": [
{ {
"mimetype": "text/html", "mimetype": "text/html",
"body": "<p>Testing <strong>bold</strong> and <em>italic</em>!</p>\n", "body": "Testing <strong>bold</strong> and <em>italic</em>!",
}, },
{ {
"body": "Testing **bold** and _italic_!", "body": "Testing **bold** and _italic_!",

View File

@ -117,7 +117,7 @@ fn text_msgtype_markdown_serialization() {
to_json_value(&formatted_message).unwrap(), to_json_value(&formatted_message).unwrap(),
json!({ json!({
"body": text, "body": text,
"formatted_body": "<p>Testing <strong>bold</strong> and <em>italic</em>!</p>\n", "formatted_body": "Testing <strong>bold</strong> and <em>italic</em>!",
"format": "org.matrix.custom.html", "format": "org.matrix.custom.html",
"msgtype": "m.text" "msgtype": "m.text"
}) })
@ -198,16 +198,20 @@ fn markdown_detection() {
assert_matches!(formatted_body, None); assert_matches!(formatted_body, None);
// Multiple paragraphs trigger markdown // Multiple paragraphs trigger markdown
let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs"); let formatted_body =
formatted_body.unwrap(); FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs").unwrap();
assert_eq!(
formatted_body.body,
"<p>A message<br />\nwith</p>\n<p>multiple</p>\n<p>paragraphs</p>\n"
);
// "Less than" symbol triggers markdown. // "Less than" symbol triggers markdown.
let formatted_body = FormattedBody::markdown("A message with & HTML < entities"); let formatted_body = FormattedBody::markdown("A message with & HTML < entities").unwrap();
assert_matches!(formatted_body, Some(_)); assert_eq!(formatted_body.body, "A message with &amp; HTML &lt; entities");
// HTML triggers markdown. // HTML triggers markdown.
let formatted_body = FormattedBody::markdown("<span>An HTML message</span>"); let formatted_body = FormattedBody::markdown("<span>An HTML message</span>").unwrap();
formatted_body.unwrap(); assert_eq!(formatted_body.body, "<span>An HTML message</span>");
} }
#[test] #[test]
@ -232,7 +236,7 @@ fn markdown_options() {
// Strikethrough // Strikethrough
let formatted_body = FormattedBody::markdown("A message with a ~~strike~~"); let formatted_body = FormattedBody::markdown("A message with a ~~strike~~");
assert_eq!(formatted_body.unwrap().body, "<p>A message with a <del>strike</del></p>\n"); assert_eq!(formatted_body.unwrap().body, "A message with a <del>strike</del>");
} }
#[test] #[test]