html: Allow to navigate through the HTML tree
This commit is contained in:
parent
d36f485b19
commit
6e763ee5e7
@ -8,6 +8,8 @@ Breaking Changes:
|
||||
Improvements:
|
||||
|
||||
- Add support for deprecated HTML tags, according to Matrix 1.10
|
||||
- Allow to navigate through the HTML tree with `Html::first_child()`,
|
||||
`Html::last_child()` or `Html::children()`
|
||||
|
||||
# 0.1.0
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::{collections::BTreeSet, fmt, io};
|
||||
use std::{collections::BTreeSet, fmt, io, iter::FusedIterator};
|
||||
|
||||
use as_variant::as_variant;
|
||||
use html5ever::{
|
||||
@ -122,6 +122,30 @@ impl Html {
|
||||
pub(crate) fn root(&self) -> &Node {
|
||||
&self.nodes[self.root_id()]
|
||||
}
|
||||
|
||||
/// Whether the root node of the HTML has children.
|
||||
pub fn has_children(&self) -> bool {
|
||||
self.root().first_child.is_some()
|
||||
}
|
||||
|
||||
/// The first child node of the root node of the HTML.
|
||||
///
|
||||
/// Returns `None` if the root node has no children.
|
||||
pub fn first_child(&self) -> Option<NodeRef<'_>> {
|
||||
self.root().first_child.map(|id| NodeRef::new(self, id))
|
||||
}
|
||||
|
||||
/// The last child node of the root node of the HTML .
|
||||
///
|
||||
/// Returns `None` if the root node has no children.
|
||||
pub fn last_child(&self) -> Option<NodeRef<'_>> {
|
||||
self.root().last_child.map(|id| NodeRef::new(self, id))
|
||||
}
|
||||
|
||||
/// Iterate through the children of the root node of the HTML.
|
||||
pub fn children(&self) -> Children<'_> {
|
||||
Children::new(self.first_child())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Html {
|
||||
@ -329,6 +353,11 @@ impl Node {
|
||||
as_variant!(&mut self.data, NodeData::Element)
|
||||
}
|
||||
|
||||
/// Returns the text content of this `Node`, if it is a `NodeData::Text`.
|
||||
fn as_text(&self) -> Option<&StrTendril> {
|
||||
as_variant!(&self.data, NodeData::Text)
|
||||
}
|
||||
|
||||
/// Returns the mutable text content of this `Node`, if it is a `NodeData::Text`.
|
||||
fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
|
||||
as_variant!(&mut self.data, NodeData::Text)
|
||||
@ -375,9 +404,9 @@ impl Node {
|
||||
}
|
||||
|
||||
/// The data of a `Node`.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(clippy::exhaustive_enums)]
|
||||
pub(crate) enum NodeData {
|
||||
pub enum NodeData {
|
||||
/// The root node of the `Html`.
|
||||
Document,
|
||||
|
||||
@ -392,7 +421,7 @@ pub(crate) enum NodeData {
|
||||
}
|
||||
|
||||
/// The data of an HTML element.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(clippy::exhaustive_structs)]
|
||||
pub struct ElementData {
|
||||
/// The qualified name of the element.
|
||||
@ -402,6 +431,123 @@ pub struct ElementData {
|
||||
pub attrs: BTreeSet<Attribute>,
|
||||
}
|
||||
|
||||
/// A reference to an HTML node.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[non_exhaustive]
|
||||
pub struct NodeRef<'a> {
|
||||
/// The `Html` struct containing the nodes.
|
||||
pub(crate) html: &'a Html,
|
||||
/// The referenced node.
|
||||
pub(crate) node: &'a Node,
|
||||
}
|
||||
|
||||
impl<'a> NodeRef<'a> {
|
||||
/// Construct a new `NodeRef` for the given HTML and node ID.
|
||||
fn new(html: &'a Html, id: usize) -> Self {
|
||||
Self { html, node: &html.nodes[id] }
|
||||
}
|
||||
|
||||
/// Construct a new `NodeRef` from the same HTML as this node with the given node ID.
|
||||
fn with_id(&self, id: usize) -> Self {
|
||||
let html = self.html;
|
||||
Self::new(html, id)
|
||||
}
|
||||
|
||||
/// The data of the node.
|
||||
pub fn data(&self) -> &'a NodeData {
|
||||
&self.node.data
|
||||
}
|
||||
|
||||
/// Returns the data of this node if it is a `NodeData::Element`.
|
||||
pub fn as_element(&self) -> Option<&'a ElementData> {
|
||||
self.node.as_element()
|
||||
}
|
||||
|
||||
/// Returns the text content of this node, if it is a `NodeData::Text`.
|
||||
pub fn as_text(&self) -> Option<&'a StrTendril> {
|
||||
self.node.as_text()
|
||||
}
|
||||
|
||||
/// The parent node of this node.
|
||||
///
|
||||
/// Returns `None` if the parent is the root node.
|
||||
pub fn parent(&self) -> Option<NodeRef<'a>> {
|
||||
let parent_id = self.node.parent?;
|
||||
|
||||
// We don't want users to be able to navigate to the root.
|
||||
if parent_id == self.html.root_id() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(self.with_id(parent_id))
|
||||
}
|
||||
|
||||
/// The next sibling node of this node.
|
||||
///
|
||||
/// Returns `None` if this is the last of its siblings.
|
||||
pub fn next_sibling(&self) -> Option<NodeRef<'a>> {
|
||||
Some(self.with_id(self.node.next_sibling?))
|
||||
}
|
||||
|
||||
/// The previous sibling node of this node.
|
||||
///
|
||||
/// Returns `None` if this is the first of its siblings.
|
||||
pub fn prev_sibling(&self) -> Option<NodeRef<'a>> {
|
||||
Some(self.with_id(self.node.prev_sibling?))
|
||||
}
|
||||
|
||||
/// Whether this node has children.
|
||||
pub fn has_children(&self) -> bool {
|
||||
self.node.first_child.is_some()
|
||||
}
|
||||
|
||||
/// The first child node of this node.
|
||||
///
|
||||
/// Returns `None` if this node has no children.
|
||||
pub fn first_child(&self) -> Option<NodeRef<'a>> {
|
||||
Some(self.with_id(self.node.first_child?))
|
||||
}
|
||||
|
||||
/// The last child node of this node.
|
||||
///
|
||||
/// Returns `None` if this node has no children.
|
||||
pub fn last_child(&self) -> Option<NodeRef<'a>> {
|
||||
Some(self.with_id(self.node.last_child?))
|
||||
}
|
||||
|
||||
/// Get an iterator through the children of this node.
|
||||
pub fn children(&self) -> Children<'a> {
|
||||
Children::new(self.first_child())
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator through the children of a node.
|
||||
///
|
||||
/// Can be constructed with [`Html::children()`] or [`NodeRef::children()`].
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Children<'a> {
|
||||
next: Option<NodeRef<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Children<'a> {
|
||||
/// Construct a `Children` starting from the given node.
|
||||
fn new(start_node: Option<NodeRef<'a>>) -> Self {
|
||||
Self { next: start_node }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Children<'a> {
|
||||
type Item = NodeRef<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let next = self.next?;
|
||||
self.next = next.next_sibling();
|
||||
Some(next)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FusedIterator for Children<'a> {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Html;
|
||||
|
@ -14,8 +14,4 @@ mod helpers;
|
||||
mod html;
|
||||
mod sanitizer_config;
|
||||
|
||||
pub use self::{
|
||||
helpers::*,
|
||||
html::{ElementData, Html},
|
||||
sanitizer_config::SanitizerConfig,
|
||||
};
|
||||
pub use self::{helpers::*, html::*, sanitizer_config::SanitizerConfig};
|
||||
|
@ -1 +1,2 @@
|
||||
mod navigate;
|
||||
mod sanitize;
|
||||
|
140
crates/ruma-html/tests/it/html/navigate.rs
Normal file
140
crates/ruma-html/tests/it/html/navigate.rs
Normal file
@ -0,0 +1,140 @@
|
||||
use ruma_html::Html;
|
||||
|
||||
#[test]
|
||||
fn navigate_tree() {
|
||||
let raw_html = "\
|
||||
<h1>Title</h1>\
|
||||
<div class=\"text\">\
|
||||
<p>This is some <em>text</em></p>\
|
||||
</div>\
|
||||
";
|
||||
let html = Html::parse(raw_html);
|
||||
|
||||
assert!(html.has_children());
|
||||
assert!(html.first_child().is_some());
|
||||
assert!(html.last_child().is_some());
|
||||
|
||||
let mut html_children = html.children();
|
||||
|
||||
// `<h1>` element.
|
||||
let h1_node = html_children.next().unwrap();
|
||||
|
||||
let h1_element = h1_node.as_element().unwrap();
|
||||
assert_eq!(&h1_element.name.local, "h1");
|
||||
assert!(h1_element.attrs.is_empty());
|
||||
|
||||
assert!(h1_node.parent().is_none());
|
||||
assert!(h1_node.next_sibling().is_some());
|
||||
assert!(h1_node.prev_sibling().is_none());
|
||||
assert!(h1_node.has_children());
|
||||
assert!(h1_node.first_child().is_some());
|
||||
assert!(h1_node.last_child().is_some());
|
||||
|
||||
let mut h1_children = h1_node.children();
|
||||
|
||||
// Text of `<h1>` element.
|
||||
let h1_text_node = h1_children.next().unwrap();
|
||||
let h1_text = h1_text_node.as_text().unwrap();
|
||||
assert_eq!(h1_text.as_ref(), "Title");
|
||||
|
||||
assert!(h1_text_node.parent().is_some());
|
||||
assert!(h1_text_node.next_sibling().is_none());
|
||||
assert!(h1_text_node.prev_sibling().is_none());
|
||||
assert!(!h1_text_node.has_children());
|
||||
assert!(h1_text_node.first_child().is_none());
|
||||
assert!(h1_text_node.last_child().is_none());
|
||||
|
||||
let mut h1_text_children = h1_text_node.children();
|
||||
assert!(h1_text_children.next().is_none());
|
||||
|
||||
assert!(h1_children.next().is_none());
|
||||
|
||||
// `<div>` element.
|
||||
let div_node = html_children.next().unwrap();
|
||||
|
||||
let div_element = div_node.as_element().unwrap();
|
||||
assert_eq!(&div_element.name.local, "div");
|
||||
assert_eq!(div_element.attrs.len(), 1);
|
||||
let class_attr = div_element.attrs.first().unwrap();
|
||||
assert_eq!(&class_attr.name.local, "class");
|
||||
assert_eq!(class_attr.value.as_ref(), "text");
|
||||
|
||||
assert!(div_node.parent().is_none());
|
||||
assert!(div_node.next_sibling().is_none());
|
||||
assert!(div_node.prev_sibling().is_some());
|
||||
assert!(div_node.has_children());
|
||||
assert!(div_node.first_child().is_some());
|
||||
assert!(div_node.last_child().is_some());
|
||||
|
||||
let mut div_children = div_node.children();
|
||||
|
||||
// `<p>` element.
|
||||
let p_node = div_children.next().unwrap();
|
||||
|
||||
let p_element = p_node.as_element().unwrap();
|
||||
assert_eq!(&p_element.name.local, "p");
|
||||
assert!(p_element.attrs.is_empty());
|
||||
|
||||
assert!(p_node.parent().is_some());
|
||||
assert!(p_node.next_sibling().is_none());
|
||||
assert!(p_node.prev_sibling().is_none());
|
||||
assert!(p_node.has_children());
|
||||
assert!(p_node.first_child().is_some());
|
||||
assert!(p_node.last_child().is_some());
|
||||
|
||||
let mut p_children = p_node.children();
|
||||
|
||||
// Text of `<p>` element.
|
||||
let p_text_node = p_children.next().unwrap();
|
||||
let p_text = p_text_node.as_text().unwrap();
|
||||
assert_eq!(p_text.as_ref(), "This is some ");
|
||||
|
||||
assert!(p_text_node.parent().is_some());
|
||||
assert!(p_text_node.next_sibling().is_some());
|
||||
assert!(p_text_node.prev_sibling().is_none());
|
||||
assert!(!p_text_node.has_children());
|
||||
assert!(p_text_node.first_child().is_none());
|
||||
assert!(p_text_node.last_child().is_none());
|
||||
|
||||
let mut p_text_children = p_text_node.children();
|
||||
assert!(p_text_children.next().is_none());
|
||||
|
||||
// `<em>` element.
|
||||
let em_node = p_children.next().unwrap();
|
||||
|
||||
let em_element = em_node.as_element().unwrap();
|
||||
assert_eq!(&em_element.name.local, "em");
|
||||
assert!(em_element.attrs.is_empty());
|
||||
|
||||
assert!(em_node.parent().is_some());
|
||||
assert!(em_node.next_sibling().is_none());
|
||||
assert!(em_node.prev_sibling().is_some());
|
||||
assert!(em_node.has_children());
|
||||
assert!(em_node.first_child().is_some());
|
||||
assert!(em_node.last_child().is_some());
|
||||
|
||||
let mut em_children = em_node.children();
|
||||
|
||||
// Text of `<em>` element.
|
||||
let em_text_node = em_children.next().unwrap();
|
||||
let em_text = em_text_node.as_text().unwrap();
|
||||
assert_eq!(em_text.as_ref(), "text");
|
||||
|
||||
assert!(em_text_node.parent().is_some());
|
||||
assert!(em_text_node.next_sibling().is_none());
|
||||
assert!(em_text_node.prev_sibling().is_none());
|
||||
assert!(!em_text_node.has_children());
|
||||
assert!(em_text_node.first_child().is_none());
|
||||
assert!(em_text_node.last_child().is_none());
|
||||
|
||||
let mut em_text_children = em_text_node.children();
|
||||
assert!(em_text_children.next().is_none());
|
||||
|
||||
assert!(em_children.next().is_none());
|
||||
|
||||
assert!(p_children.next().is_none());
|
||||
|
||||
assert!(div_children.next().is_none());
|
||||
|
||||
assert!(html_children.next().is_none());
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user