events: Improve markdown syntax detection
We also detect backslash escapes and entity references.
This commit is contained in:
parent
0ea496b138
commit
dac38e4e17
@ -7,6 +7,8 @@ Bug fixes:
|
|||||||
- Fix serialization of `room::message::Relation` and `room::encrypted::Relation`
|
- Fix serialization of `room::message::Relation` and `room::encrypted::Relation`
|
||||||
which could cause duplicate `rel_type` keys.
|
which could cause duplicate `rel_type` keys.
|
||||||
- `Restricted` no longer fails to deserialize when the `allow` field is missing
|
- `Restricted` no longer fails to deserialize when the `allow` field is missing
|
||||||
|
- Markdown text constructors now also detect markdown syntax like backslash
|
||||||
|
escapes and entity references to decide if the text should be sent as HTML.
|
||||||
|
|
||||||
Improvements:
|
Improvements:
|
||||||
|
|
||||||
|
@ -858,11 +858,12 @@ pub struct CustomEventContent {
|
|||||||
|
|
||||||
#[cfg(feature = "markdown")]
|
#[cfg(feature = "markdown")]
|
||||||
pub(crate) fn parse_markdown(text: &str) -> Option<String> {
|
pub(crate) fn parse_markdown(text: &str) -> Option<String> {
|
||||||
use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
|
use pulldown_cmark::{CowStr, Event, Options, Parser, Tag, TagEnd};
|
||||||
|
|
||||||
const OPTIONS: Options = Options::ENABLE_TABLES.union(Options::ENABLE_STRIKETHROUGH);
|
const OPTIONS: Options = Options::ENABLE_TABLES.union(Options::ENABLE_STRIKETHROUGH);
|
||||||
|
|
||||||
let mut found_first_paragraph = false;
|
let mut found_first_paragraph = false;
|
||||||
|
let mut previous_event_was_text = false;
|
||||||
|
|
||||||
let parser_events: Vec<_> = Parser::new_ext(text, OPTIONS)
|
let parser_events: Vec<_> = Parser::new_ext(text, OPTIONS)
|
||||||
.map(|event| match event {
|
.map(|event| match event {
|
||||||
@ -871,8 +872,29 @@ pub(crate) fn parse_markdown(text: &str) -> Option<String> {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
let has_markdown = parser_events.iter().any(|ref event| {
|
let has_markdown = parser_events.iter().any(|ref event| {
|
||||||
let is_text = matches!(event, Event::Text(_));
|
// Numeric references should be replaced by their UTF-8 equivalent, so encountering a
|
||||||
|
// non-borrowed string means that there is markdown syntax.
|
||||||
|
let is_borrowed_text = matches!(event, Event::Text(CowStr::Borrowed(_)));
|
||||||
|
|
||||||
|
if is_borrowed_text {
|
||||||
|
if previous_event_was_text {
|
||||||
|
// The text was split, so a character was likely removed, like in the case of
|
||||||
|
// backslash escapes, or replaced by a static string, like for entity references, so
|
||||||
|
// there is markdown syntax.
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
previous_event_was_text = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
previous_event_was_text = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A hard break happens when a newline is encountered, which is not necessarily markdown
|
||||||
|
// syntax.
|
||||||
let is_break = matches!(event, Event::HardBreak);
|
let is_break = matches!(event, Event::HardBreak);
|
||||||
|
|
||||||
|
// The parser always wraps the string into a paragraph, so the first paragraph should be
|
||||||
|
// ignored, it is not due to markdown syntax.
|
||||||
let is_first_paragraph_start = if matches!(event, Event::Start(Tag::Paragraph)) {
|
let is_first_paragraph_start = if matches!(event, Event::Start(Tag::Paragraph)) {
|
||||||
if found_first_paragraph {
|
if found_first_paragraph {
|
||||||
false
|
false
|
||||||
@ -885,7 +907,7 @@ pub(crate) fn parse_markdown(text: &str) -> Option<String> {
|
|||||||
};
|
};
|
||||||
let is_paragraph_end = matches!(event, Event::End(TagEnd::Paragraph));
|
let is_paragraph_end = matches!(event, Event::End(TagEnd::Paragraph));
|
||||||
|
|
||||||
!is_text && !is_break && !is_first_paragraph_start && !is_paragraph_end
|
!is_borrowed_text && !is_break && !is_first_paragraph_start && !is_paragraph_end
|
||||||
});
|
});
|
||||||
|
|
||||||
if !has_markdown {
|
if !has_markdown {
|
||||||
@ -897,3 +919,41 @@ pub(crate) fn parse_markdown(text: &str) -> Option<String> {
|
|||||||
|
|
||||||
Some(html_body)
|
Some(html_body)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(test, feature = "markdown"))]
|
||||||
|
mod tests {
|
||||||
|
use assert_matches2::assert_matches;
|
||||||
|
|
||||||
|
use super::parse_markdown;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn detect_markdown() {
|
||||||
|
// Simple single-line text.
|
||||||
|
let text = "Hello world.";
|
||||||
|
assert_matches!(parse_markdown(text), None);
|
||||||
|
|
||||||
|
// Simple double-line text.
|
||||||
|
let text = "Hello\nworld.";
|
||||||
|
assert_matches!(parse_markdown(text), None);
|
||||||
|
|
||||||
|
// With new paragraph.
|
||||||
|
let text = "Hello\n\nworld.";
|
||||||
|
assert_matches!(parse_markdown(text), Some(_));
|
||||||
|
|
||||||
|
// With tagged element.
|
||||||
|
let text = "Hello **world**.";
|
||||||
|
assert_matches!(parse_markdown(text), Some(_));
|
||||||
|
|
||||||
|
// With backslash escapes.
|
||||||
|
let text = r#"Hello \<world\>."#;
|
||||||
|
assert_matches!(parse_markdown(text), Some(_));
|
||||||
|
|
||||||
|
// With entity reference.
|
||||||
|
let text = r#"Hello <world>."#;
|
||||||
|
assert_matches!(parse_markdown(text), Some(_));
|
||||||
|
|
||||||
|
// With numeric reference.
|
||||||
|
let text = "Hello w⊕rld.";
|
||||||
|
assert_matches!(parse_markdown(text), Some(_));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -201,9 +201,9 @@ fn markdown_detection() {
|
|||||||
let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs");
|
let formatted_body = FormattedBody::markdown("A message\nwith\n\nmultiple\n\nparagraphs");
|
||||||
formatted_body.unwrap();
|
formatted_body.unwrap();
|
||||||
|
|
||||||
// HTML entities don't trigger markdown.
|
// "Less than" symbol triggers markdown.
|
||||||
let formatted_body = FormattedBody::markdown("A message with & HTML < entities");
|
let formatted_body = FormattedBody::markdown("A message with & HTML < entities");
|
||||||
assert_matches!(formatted_body, None);
|
assert_matches!(formatted_body, Some(_));
|
||||||
|
|
||||||
// HTML triggers markdown.
|
// HTML triggers markdown.
|
||||||
let formatted_body = FormattedBody::markdown("<span>An HTML message</span>");
|
let formatted_body = FormattedBody::markdown("<span>An HTML message</span>");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user