html: Allow to navigate through the HTML tree
This commit is contained in:
parent
d36f485b19
commit
6e763ee5e7
@ -8,6 +8,8 @@ Breaking Changes:
|
|||||||
Improvements:
|
Improvements:
|
||||||
|
|
||||||
- Add support for deprecated HTML tags, according to Matrix 1.10
|
- Add support for deprecated HTML tags, according to Matrix 1.10
|
||||||
|
- Allow to navigate through the HTML tree with `Html::first_child()`,
|
||||||
|
`Html::last_child()` or `Html::children()`
|
||||||
|
|
||||||
# 0.1.0
|
# 0.1.0
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::{collections::BTreeSet, fmt, io};
|
use std::{collections::BTreeSet, fmt, io, iter::FusedIterator};
|
||||||
|
|
||||||
use as_variant::as_variant;
|
use as_variant::as_variant;
|
||||||
use html5ever::{
|
use html5ever::{
|
||||||
@ -122,6 +122,30 @@ impl Html {
|
|||||||
pub(crate) fn root(&self) -> &Node {
|
pub(crate) fn root(&self) -> &Node {
|
||||||
&self.nodes[self.root_id()]
|
&self.nodes[self.root_id()]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the root node of the HTML has children.
|
||||||
|
pub fn has_children(&self) -> bool {
|
||||||
|
self.root().first_child.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The first child node of the root node of the HTML.
|
||||||
|
///
|
||||||
|
/// Returns `None` if the root node has no children.
|
||||||
|
pub fn first_child(&self) -> Option<NodeRef<'_>> {
|
||||||
|
self.root().first_child.map(|id| NodeRef::new(self, id))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The last child node of the root node of the HTML .
|
||||||
|
///
|
||||||
|
/// Returns `None` if the root node has no children.
|
||||||
|
pub fn last_child(&self) -> Option<NodeRef<'_>> {
|
||||||
|
self.root().last_child.map(|id| NodeRef::new(self, id))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate through the children of the root node of the HTML.
|
||||||
|
pub fn children(&self) -> Children<'_> {
|
||||||
|
Children::new(self.first_child())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Html {
|
impl Default for Html {
|
||||||
@ -329,6 +353,11 @@ impl Node {
|
|||||||
as_variant!(&mut self.data, NodeData::Element)
|
as_variant!(&mut self.data, NodeData::Element)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the text content of this `Node`, if it is a `NodeData::Text`.
|
||||||
|
fn as_text(&self) -> Option<&StrTendril> {
|
||||||
|
as_variant!(&self.data, NodeData::Text)
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the mutable text content of this `Node`, if it is a `NodeData::Text`.
|
/// Returns the mutable text content of this `Node`, if it is a `NodeData::Text`.
|
||||||
fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
|
fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
|
||||||
as_variant!(&mut self.data, NodeData::Text)
|
as_variant!(&mut self.data, NodeData::Text)
|
||||||
@ -375,9 +404,9 @@ impl Node {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The data of a `Node`.
|
/// The data of a `Node`.
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
#[allow(clippy::exhaustive_enums)]
|
#[allow(clippy::exhaustive_enums)]
|
||||||
pub(crate) enum NodeData {
|
pub enum NodeData {
|
||||||
/// The root node of the `Html`.
|
/// The root node of the `Html`.
|
||||||
Document,
|
Document,
|
||||||
|
|
||||||
@ -392,7 +421,7 @@ pub(crate) enum NodeData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The data of an HTML element.
|
/// The data of an HTML element.
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
#[allow(clippy::exhaustive_structs)]
|
#[allow(clippy::exhaustive_structs)]
|
||||||
pub struct ElementData {
|
pub struct ElementData {
|
||||||
/// The qualified name of the element.
|
/// The qualified name of the element.
|
||||||
@ -402,6 +431,123 @@ pub struct ElementData {
|
|||||||
pub attrs: BTreeSet<Attribute>,
|
pub attrs: BTreeSet<Attribute>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A reference to an HTML node.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct NodeRef<'a> {
|
||||||
|
/// The `Html` struct containing the nodes.
|
||||||
|
pub(crate) html: &'a Html,
|
||||||
|
/// The referenced node.
|
||||||
|
pub(crate) node: &'a Node,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> NodeRef<'a> {
|
||||||
|
/// Construct a new `NodeRef` for the given HTML and node ID.
|
||||||
|
fn new(html: &'a Html, id: usize) -> Self {
|
||||||
|
Self { html, node: &html.nodes[id] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a new `NodeRef` from the same HTML as this node with the given node ID.
|
||||||
|
fn with_id(&self, id: usize) -> Self {
|
||||||
|
let html = self.html;
|
||||||
|
Self::new(html, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The data of the node.
|
||||||
|
pub fn data(&self) -> &'a NodeData {
|
||||||
|
&self.node.data
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the data of this node if it is a `NodeData::Element`.
|
||||||
|
pub fn as_element(&self) -> Option<&'a ElementData> {
|
||||||
|
self.node.as_element()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the text content of this node, if it is a `NodeData::Text`.
|
||||||
|
pub fn as_text(&self) -> Option<&'a StrTendril> {
|
||||||
|
self.node.as_text()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The parent node of this node.
|
||||||
|
///
|
||||||
|
/// Returns `None` if the parent is the root node.
|
||||||
|
pub fn parent(&self) -> Option<NodeRef<'a>> {
|
||||||
|
let parent_id = self.node.parent?;
|
||||||
|
|
||||||
|
// We don't want users to be able to navigate to the root.
|
||||||
|
if parent_id == self.html.root_id() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(self.with_id(parent_id))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The next sibling node of this node.
|
||||||
|
///
|
||||||
|
/// Returns `None` if this is the last of its siblings.
|
||||||
|
pub fn next_sibling(&self) -> Option<NodeRef<'a>> {
|
||||||
|
Some(self.with_id(self.node.next_sibling?))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The previous sibling node of this node.
|
||||||
|
///
|
||||||
|
/// Returns `None` if this is the first of its siblings.
|
||||||
|
pub fn prev_sibling(&self) -> Option<NodeRef<'a>> {
|
||||||
|
Some(self.with_id(self.node.prev_sibling?))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether this node has children.
|
||||||
|
pub fn has_children(&self) -> bool {
|
||||||
|
self.node.first_child.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The first child node of this node.
|
||||||
|
///
|
||||||
|
/// Returns `None` if this node has no children.
|
||||||
|
pub fn first_child(&self) -> Option<NodeRef<'a>> {
|
||||||
|
Some(self.with_id(self.node.first_child?))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The last child node of this node.
|
||||||
|
///
|
||||||
|
/// Returns `None` if this node has no children.
|
||||||
|
pub fn last_child(&self) -> Option<NodeRef<'a>> {
|
||||||
|
Some(self.with_id(self.node.last_child?))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get an iterator through the children of this node.
|
||||||
|
pub fn children(&self) -> Children<'a> {
|
||||||
|
Children::new(self.first_child())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator through the children of a node.
|
||||||
|
///
|
||||||
|
/// Can be constructed with [`Html::children()`] or [`NodeRef::children()`].
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct Children<'a> {
|
||||||
|
next: Option<NodeRef<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Children<'a> {
|
||||||
|
/// Construct a `Children` starting from the given node.
|
||||||
|
fn new(start_node: Option<NodeRef<'a>>) -> Self {
|
||||||
|
Self { next: start_node }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for Children<'a> {
|
||||||
|
type Item = NodeRef<'a>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let next = self.next?;
|
||||||
|
self.next = next.next_sibling();
|
||||||
|
Some(next)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FusedIterator for Children<'a> {}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::Html;
|
use super::Html;
|
||||||
|
@ -14,8 +14,4 @@ mod helpers;
|
|||||||
mod html;
|
mod html;
|
||||||
mod sanitizer_config;
|
mod sanitizer_config;
|
||||||
|
|
||||||
pub use self::{
|
pub use self::{helpers::*, html::*, sanitizer_config::SanitizerConfig};
|
||||||
helpers::*,
|
|
||||||
html::{ElementData, Html},
|
|
||||||
sanitizer_config::SanitizerConfig,
|
|
||||||
};
|
|
||||||
|
@ -1 +1,2 @@
|
|||||||
|
mod navigate;
|
||||||
mod sanitize;
|
mod sanitize;
|
||||||
|
140
crates/ruma-html/tests/it/html/navigate.rs
Normal file
140
crates/ruma-html/tests/it/html/navigate.rs
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
use ruma_html::Html;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn navigate_tree() {
|
||||||
|
let raw_html = "\
|
||||||
|
<h1>Title</h1>\
|
||||||
|
<div class=\"text\">\
|
||||||
|
<p>This is some <em>text</em></p>\
|
||||||
|
</div>\
|
||||||
|
";
|
||||||
|
let html = Html::parse(raw_html);
|
||||||
|
|
||||||
|
assert!(html.has_children());
|
||||||
|
assert!(html.first_child().is_some());
|
||||||
|
assert!(html.last_child().is_some());
|
||||||
|
|
||||||
|
let mut html_children = html.children();
|
||||||
|
|
||||||
|
// `<h1>` element.
|
||||||
|
let h1_node = html_children.next().unwrap();
|
||||||
|
|
||||||
|
let h1_element = h1_node.as_element().unwrap();
|
||||||
|
assert_eq!(&h1_element.name.local, "h1");
|
||||||
|
assert!(h1_element.attrs.is_empty());
|
||||||
|
|
||||||
|
assert!(h1_node.parent().is_none());
|
||||||
|
assert!(h1_node.next_sibling().is_some());
|
||||||
|
assert!(h1_node.prev_sibling().is_none());
|
||||||
|
assert!(h1_node.has_children());
|
||||||
|
assert!(h1_node.first_child().is_some());
|
||||||
|
assert!(h1_node.last_child().is_some());
|
||||||
|
|
||||||
|
let mut h1_children = h1_node.children();
|
||||||
|
|
||||||
|
// Text of `<h1>` element.
|
||||||
|
let h1_text_node = h1_children.next().unwrap();
|
||||||
|
let h1_text = h1_text_node.as_text().unwrap();
|
||||||
|
assert_eq!(h1_text.as_ref(), "Title");
|
||||||
|
|
||||||
|
assert!(h1_text_node.parent().is_some());
|
||||||
|
assert!(h1_text_node.next_sibling().is_none());
|
||||||
|
assert!(h1_text_node.prev_sibling().is_none());
|
||||||
|
assert!(!h1_text_node.has_children());
|
||||||
|
assert!(h1_text_node.first_child().is_none());
|
||||||
|
assert!(h1_text_node.last_child().is_none());
|
||||||
|
|
||||||
|
let mut h1_text_children = h1_text_node.children();
|
||||||
|
assert!(h1_text_children.next().is_none());
|
||||||
|
|
||||||
|
assert!(h1_children.next().is_none());
|
||||||
|
|
||||||
|
// `<div>` element.
|
||||||
|
let div_node = html_children.next().unwrap();
|
||||||
|
|
||||||
|
let div_element = div_node.as_element().unwrap();
|
||||||
|
assert_eq!(&div_element.name.local, "div");
|
||||||
|
assert_eq!(div_element.attrs.len(), 1);
|
||||||
|
let class_attr = div_element.attrs.first().unwrap();
|
||||||
|
assert_eq!(&class_attr.name.local, "class");
|
||||||
|
assert_eq!(class_attr.value.as_ref(), "text");
|
||||||
|
|
||||||
|
assert!(div_node.parent().is_none());
|
||||||
|
assert!(div_node.next_sibling().is_none());
|
||||||
|
assert!(div_node.prev_sibling().is_some());
|
||||||
|
assert!(div_node.has_children());
|
||||||
|
assert!(div_node.first_child().is_some());
|
||||||
|
assert!(div_node.last_child().is_some());
|
||||||
|
|
||||||
|
let mut div_children = div_node.children();
|
||||||
|
|
||||||
|
// `<p>` element.
|
||||||
|
let p_node = div_children.next().unwrap();
|
||||||
|
|
||||||
|
let p_element = p_node.as_element().unwrap();
|
||||||
|
assert_eq!(&p_element.name.local, "p");
|
||||||
|
assert!(p_element.attrs.is_empty());
|
||||||
|
|
||||||
|
assert!(p_node.parent().is_some());
|
||||||
|
assert!(p_node.next_sibling().is_none());
|
||||||
|
assert!(p_node.prev_sibling().is_none());
|
||||||
|
assert!(p_node.has_children());
|
||||||
|
assert!(p_node.first_child().is_some());
|
||||||
|
assert!(p_node.last_child().is_some());
|
||||||
|
|
||||||
|
let mut p_children = p_node.children();
|
||||||
|
|
||||||
|
// Text of `<p>` element.
|
||||||
|
let p_text_node = p_children.next().unwrap();
|
||||||
|
let p_text = p_text_node.as_text().unwrap();
|
||||||
|
assert_eq!(p_text.as_ref(), "This is some ");
|
||||||
|
|
||||||
|
assert!(p_text_node.parent().is_some());
|
||||||
|
assert!(p_text_node.next_sibling().is_some());
|
||||||
|
assert!(p_text_node.prev_sibling().is_none());
|
||||||
|
assert!(!p_text_node.has_children());
|
||||||
|
assert!(p_text_node.first_child().is_none());
|
||||||
|
assert!(p_text_node.last_child().is_none());
|
||||||
|
|
||||||
|
let mut p_text_children = p_text_node.children();
|
||||||
|
assert!(p_text_children.next().is_none());
|
||||||
|
|
||||||
|
// `<em>` element.
|
||||||
|
let em_node = p_children.next().unwrap();
|
||||||
|
|
||||||
|
let em_element = em_node.as_element().unwrap();
|
||||||
|
assert_eq!(&em_element.name.local, "em");
|
||||||
|
assert!(em_element.attrs.is_empty());
|
||||||
|
|
||||||
|
assert!(em_node.parent().is_some());
|
||||||
|
assert!(em_node.next_sibling().is_none());
|
||||||
|
assert!(em_node.prev_sibling().is_some());
|
||||||
|
assert!(em_node.has_children());
|
||||||
|
assert!(em_node.first_child().is_some());
|
||||||
|
assert!(em_node.last_child().is_some());
|
||||||
|
|
||||||
|
let mut em_children = em_node.children();
|
||||||
|
|
||||||
|
// Text of `<em>` element.
|
||||||
|
let em_text_node = em_children.next().unwrap();
|
||||||
|
let em_text = em_text_node.as_text().unwrap();
|
||||||
|
assert_eq!(em_text.as_ref(), "text");
|
||||||
|
|
||||||
|
assert!(em_text_node.parent().is_some());
|
||||||
|
assert!(em_text_node.next_sibling().is_none());
|
||||||
|
assert!(em_text_node.prev_sibling().is_none());
|
||||||
|
assert!(!em_text_node.has_children());
|
||||||
|
assert!(em_text_node.first_child().is_none());
|
||||||
|
assert!(em_text_node.last_child().is_none());
|
||||||
|
|
||||||
|
let mut em_text_children = em_text_node.children();
|
||||||
|
assert!(em_text_children.next().is_none());
|
||||||
|
|
||||||
|
assert!(em_children.next().is_none());
|
||||||
|
|
||||||
|
assert!(p_children.next().is_none());
|
||||||
|
|
||||||
|
assert!(div_children.next().is_none());
|
||||||
|
|
||||||
|
assert!(html_children.next().is_none());
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user