events: Remove p tag around inline markdown

As recommended by the spec.
This commit is contained in:
Kévin Commaille 2024-10-24 20:26:27 +02:00 committed by strawberry
parent c3fb396352
commit 278a45aec8
4 changed files with 76 additions and 19 deletions

View File

@ -10,6 +10,8 @@ Bug fixes:
- `Restricted` no longer fails to deserialize when the `allow` field is missing
- Markdown text constructors now also detect markdown syntax like backslash
escapes and entity references to decide if the text should be sent as HTML.
- Markdown text with only inline syntax generates HTML that is not wrapped
inside a `<p>` element anymore, as recommended by the spec.
Improvements:

View File

@ -915,46 +915,97 @@ pub(crate) fn parse_markdown(text: &str) -> Option<String> {
return None;
}
// If the markdown contains only inline tags, i.e. if there is only one paragraph, remove the
// wrapping paragraph, as instructed by the spec.
let first_event_is_paragraph_start =
parser_events.first().is_some_and(|event| matches!(event, Event::Start(Tag::Paragraph)));
let last_event_is_paragraph_end =
parser_events.last().is_some_and(|event| matches!(event, Event::End(TagEnd::Paragraph)));
let mut has_several_blocks = true;
if first_event_is_paragraph_start && last_event_is_paragraph_end {
has_several_blocks = parser_events.iter().skip(1).any(|event| {
if let Event::Start(tag) = event {
is_block_tag(tag)
} else {
false
}
});
}
let mut events_iter = parser_events.into_iter();
if !has_several_blocks {
events_iter.next();
events_iter.next_back();
}
let mut html_body = String::new();
pulldown_cmark::html::push_html(&mut html_body, parser_events.into_iter());
pulldown_cmark::html::push_html(&mut html_body, events_iter);
Some(html_body)
}
/// Whether the given tag is a block HTML element.
#[cfg(feature = "markdown")]
fn is_block_tag(tag: &pulldown_cmark::Tag<'_>) -> bool {
use pulldown_cmark::Tag;
matches!(
tag,
Tag::Paragraph
| Tag::Heading { .. }
| Tag::BlockQuote(_)
| Tag::CodeBlock(_)
| Tag::HtmlBlock
| Tag::List(_)
| Tag::FootnoteDefinition(_)
| Tag::Table(_)
)
}
#[cfg(all(test, feature = "markdown"))]
mod tests {
use assert_matches2::assert_matches;
use super::parse_markdown;
#[test]
fn detect_markdown() {
// Simple single-line text.
let text = "Hello world.";
assert_matches!(parse_markdown(text), None);
assert_eq!(parse_markdown(text), None);
// Simple double-line text.
let text = "Hello\nworld.";
assert_matches!(parse_markdown(text), None);
assert_eq!(parse_markdown(text), None);
// With new paragraph.
let text = "Hello\n\nworld.";
assert_matches!(parse_markdown(text), Some(_));
assert_eq!(parse_markdown(text).as_deref(), Some("<p>Hello</p>\n<p>world.</p>\n"));
// With heading and paragraph.
let text = "## Hello\n\nworld.";
assert_eq!(parse_markdown(text).as_deref(), Some("<h2>Hello</h2>\n<p>world.</p>\n"));
// With paragraph and code block.
let text = "Hello\n\n```\nworld.\n```";
assert_eq!(
parse_markdown(text).as_deref(),
Some("<p>Hello</p>\n<pre><code>world.\n</code></pre>\n")
);
// With tagged element.
let text = "Hello **world**.";
assert_matches!(parse_markdown(text), Some(_));
assert_eq!(parse_markdown(text).as_deref(), Some("Hello <strong>world</strong>."));
// With backslash escapes.
let text = r#"Hello \<world\>."#;
assert_matches!(parse_markdown(text), Some(_));
assert_eq!(parse_markdown(text).as_deref(), Some("Hello &lt;world&gt;."));
// With entity reference.
let text = r#"Hello &lt;world&gt;."#;
assert_matches!(parse_markdown(text), Some(_));
assert_eq!(parse_markdown(text).as_deref(), Some("Hello &lt;world&gt;."));
// With numeric reference.
let text = "Hello w&#8853;rld.";
assert_matches!(parse_markdown(text), Some(_));
assert_eq!(parse_markdown(text).as_deref(), Some("Hello w⊕rld."));
}
}

View File

@ -82,7 +82,7 @@ fn markdown_content_serialization() {
"org.matrix.msc1767.text": [
{
"mimetype": "text/html",
"body": "<p>Testing <strong>bold</strong> and <em>italic</em>!</p>\n",
"body": "Testing <strong>bold</strong> and <em>italic</em>!",
},
{
"body": "Testing **bold** and _italic_!",

View File

@ -117,7 +117,7 @@ fn text_msgtype_markdown_serialization() {
to_json_value(&formatted_message).unwrap(),
json!({
"body": text,
"formatted_body": "<p>Testing <strong>bold</strong> and <em>italic</em>!</p>\n",
"formatted_body": "Testing <strong>bold</strong> and <em>italic</em>!",
"format": "org.matrix.custom.html",
"msgtype": "m.text"
})
@ -198,16 +198,20 @@ fn markdown_detection() {
assert_matches!(formatted_body, None);
// Multiple paragraphs trigger markdown
let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs");
formatted_body.unwrap();
let formatted_body =
FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs").unwrap();
assert_eq!(
formatted_body.body,
"<p>A message<br />\nwith</p>\n<p>multiple</p>\n<p>paragraphs</p>\n"
);
// "Less than" symbol triggers markdown.
let formatted_body = FormattedBody::markdown("A message with & HTML < entities");
assert_matches!(formatted_body, Some(_));
let formatted_body = FormattedBody::markdown("A message with & HTML < entities").unwrap();
assert_eq!(formatted_body.body, "A message with &amp; HTML &lt; entities");
// HTML triggers markdown.
let formatted_body = FormattedBody::markdown("<span>An HTML message</span>");
formatted_body.unwrap();
let formatted_body = FormattedBody::markdown("<span>An HTML message</span>").unwrap();
assert_eq!(formatted_body.body, "<span>An HTML message</span>");
}
#[test]
@ -232,7 +236,7 @@ fn markdown_options() {
// Strikethrough
let formatted_body = FormattedBody::markdown("A message with a ~~strike~~");
assert_eq!(formatted_body.unwrap().body, "<p>A message with a <del>strike</del></p>\n");
assert_eq!(formatted_body.unwrap().body, "A message with a <del>strike</del>");
}
#[test]