events: Fix markdown content detection

Encoded HTML entities would be detected as a change.
Use the parsed events instead to check if markdown syntax was detected.
This commit is contained in:
Kévin Commaille 2022-10-19 17:21:07 +02:00 committed by Kévin Commaille
parent 7ab6e3ed02
commit a710229d9d
3 changed files with 65 additions and 11 deletions

View File

@ -238,12 +238,9 @@ impl Text {
/// Returns `None` if no Markdown formatting was found.
#[cfg(feature = "markdown")]
pub fn markdown(body: impl AsRef<str>) -> Option<Self> {
let body = body.as_ref();
let mut html_body = String::new();
use super::room::message::parse_markdown;
pulldown_cmark::html::push_html(&mut html_body, pulldown_cmark::Parser::new(body));
(html_body != format!("<p>{}</p>\n", body)).then(|| Self::html(html_body))
parse_markdown(body.as_ref()).map(Self::html)
}
fn default_mimetype() -> String {

View File

@ -592,12 +592,7 @@ impl FormattedBody {
/// Returns `None` if no Markdown formatting was found.
#[cfg(feature = "markdown")]
pub fn markdown(body: impl AsRef<str>) -> Option<Self> {
let body = body.as_ref();
let mut html_body = String::new();
pulldown_cmark::html::push_html(&mut html_body, pulldown_cmark::Parser::new(body));
(html_body != format!("<p>{}</p>\n", body)).then(|| Self::html(html_body))
parse_markdown(body.as_ref()).map(Self::html)
}
/// Sanitize this `FormattedBody` if its format is `MessageFormat::Html`.
@ -636,3 +631,43 @@ pub struct CustomEventContent {
#[serde(flatten)]
data: JsonObject,
}
#[cfg(feature = "markdown")]
pub(crate) fn parse_markdown(text: &str) -> Option<String> {
use pulldown_cmark::{Event, Parser, Tag};
let mut found_first_paragraph = false;
let has_markdown = Parser::new(text).any(|ref event| {
let is_text = matches!(event, Event::Text(_));
let is_break = matches!(event, Event::SoftBreak | Event::HardBreak);
let is_first_paragraph_start = if matches!(event,
Event::Start(tag)
if matches!(tag, Tag::Paragraph)
) {
if found_first_paragraph {
false
} else {
found_first_paragraph = true;
true
}
} else {
false
};
let is_paragraph_end = matches!(event,
Event::End(tag)
if matches!(tag, Tag::Paragraph)
);
!is_text && !is_break && !is_first_paragraph_start && !is_paragraph_end
});
if !has_markdown {
return None;
}
let mut html_body = String::new();
pulldown_cmark::html::push_html(&mut html_body, Parser::new(text));
Some(html_body)
}

View File

@ -302,6 +302,28 @@ fn markdown_content_serialization() {
);
}
#[test]
#[cfg(feature = "markdown")]
fn markdown_detection() {
use ruma_common::events::room::message::FormattedBody;
// No markdown
let formatted_body = FormattedBody::markdown("A simple message.");
assert_matches!(formatted_body, None);
// Multiple paragraphs trigger markdown
let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs");
formatted_body.unwrap();
// HTML entities don't trigger markdown.
let formatted_body = FormattedBody::markdown("A message with & HTML < entities");
assert_matches!(formatted_body, None);
// HTML triggers markdown.
let formatted_body = FormattedBody::markdown("<span>An HTML message</span>");
formatted_body.unwrap();
}
#[test]
fn verification_request_deserialization() {
let user_id = user_id!("@example2:localhost");