xtask: Disallow linking to the latest spec when a link uses a non-unique ID
These IDs can change from one version to the other if another heading with the same name is added before, so the link would not point to the proper part of the spec anymore.
This commit is contained in:
parent
5e6fe83d38
commit
e653912e22
@ -9,8 +9,8 @@ pub mod v3 {
|
|||||||
//! [by their Matrix identifier][spec-mxid], and one to invite a user
|
//! [by their Matrix identifier][spec-mxid], and one to invite a user
|
||||||
//! [by their third party identifier][spec-3pid].
|
//! [by their third party identifier][spec-3pid].
|
||||||
//!
|
//!
|
||||||
//! [spec-mxid]: https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3roomsroomidinvite
|
//! [spec-mxid]: https://spec.matrix.org/v1.5/client-server-api/#post_matrixclientv3roomsroomidinvite
|
||||||
//! [spec-3pid]: https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3roomsroomidinvite-1
|
//! [spec-3pid]: https://spec.matrix.org/v1.5/client-server-api/#post_matrixclientv3roomsroomidinvite-1
|
||||||
|
|
||||||
use ruma_common::{
|
use ruma_common::{
|
||||||
api::{request, response, Metadata},
|
api::{request, response, Metadata},
|
||||||
|
@ -590,7 +590,7 @@ mod tests {
|
|||||||
assert!(!"m".matches_word("[[:alpha:]]?"));
|
assert!(!"m".matches_word("[[:alpha:]]?"));
|
||||||
assert!("[[:alpha:]]!".matches_word("[[:alpha:]]?"));
|
assert!("[[:alpha:]]!".matches_word("[[:alpha:]]?"));
|
||||||
|
|
||||||
// From the spec: <https://spec.matrix.org/latest/client-server-api/#conditions-1>
|
// From the spec: <https://spec.matrix.org/v1.5/client-server-api/#conditions-1>
|
||||||
assert!("An example event.".matches_word("ex*ple"));
|
assert!("An example event.".matches_word("ex*ple"));
|
||||||
assert!("exple".matches_word("ex*ple"));
|
assert!("exple".matches_word("ex*ple"));
|
||||||
assert!("An exciting triple-whammy".matches_word("ex*ple"));
|
assert!("An exciting triple-whammy".matches_word("ex*ple"));
|
||||||
@ -639,7 +639,7 @@ mod tests {
|
|||||||
assert!("".matches_pattern("*", false));
|
assert!("".matches_pattern("*", false));
|
||||||
assert!(!"foo".matches_pattern("", false));
|
assert!(!"foo".matches_pattern("", false));
|
||||||
|
|
||||||
// From the spec: <https://spec.matrix.org/latest/client-server-api/#conditions-1>
|
// From the spec: <https://spec.matrix.org/v1.5/client-server-api/#conditions-1>
|
||||||
assert!("Lunch plans".matches_pattern("lunc?*", false));
|
assert!("Lunch plans".matches_pattern("lunc?*", false));
|
||||||
assert!("LUNCH".matches_pattern("lunc?*", false));
|
assert!("LUNCH".matches_pattern("lunc?*", false));
|
||||||
assert!(!" lunch".matches_pattern("lunc?*", false));
|
assert!(!" lunch".matches_pattern("lunc?*", false));
|
||||||
|
@ -77,7 +77,7 @@ pub enum MxcUriError {
|
|||||||
/// Media identifier malformed due to invalid characters detected.
|
/// Media identifier malformed due to invalid characters detected.
|
||||||
///
|
///
|
||||||
/// Valid characters are (in regex notation) `[A-Za-z0-9_-]+`.
|
/// Valid characters are (in regex notation) `[A-Za-z0-9_-]+`.
|
||||||
/// See [here](https://spec.matrix.org/latest/client-server-api/#security-considerations-5) for more details.
|
/// See [here](https://spec.matrix.org/v1.5/client-server-api/#security-considerations-5) for more details.
|
||||||
#[error("Media Identifier malformed, invalid characters")]
|
#[error("Media Identifier malformed, invalid characters")]
|
||||||
MediaIdMalformed,
|
MediaIdMalformed,
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ pub fn validate(uri: &str) -> Result<NonZeroU8, MxcUriError> {
|
|||||||
|
|
||||||
let server_name = &uri[..index];
|
let server_name = &uri[..index];
|
||||||
let media_id = &uri[index + 1..];
|
let media_id = &uri[index + 1..];
|
||||||
// See: https://spec.matrix.org/latest/client-server-api/#security-considerations-5
|
// See: https://spec.matrix.org/v1.5/client-server-api/#security-considerations-5
|
||||||
let media_id_is_valid =
|
let media_id_is_valid =
|
||||||
media_id.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'-' ));
|
media_id.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'-' ));
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#![allow(clippy::disallowed_types)]
|
#![allow(clippy::disallowed_types)]
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashMap, HashSet},
|
collections::HashMap,
|
||||||
fs::{self, File},
|
fs::{self, File},
|
||||||
io::{BufRead, BufReader},
|
io::{BufRead, BufReader},
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
@ -72,6 +72,15 @@ impl SpecLink {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether an ID has duplicates.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum HasDuplicates {
|
||||||
|
/// The ID has duplicates.
|
||||||
|
Yes,
|
||||||
|
/// The ID doesn't have duplicates.
|
||||||
|
No,
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn check_spec_links(path: &Path) -> Result<()> {
|
pub(crate) fn check_spec_links(path: &Path) -> Result<()> {
|
||||||
println!("Checking Matrix spec links are up-to-date...");
|
println!("Checking Matrix spec links are up-to-date...");
|
||||||
let links = collect_links(path)?;
|
let links = collect_links(path)?;
|
||||||
@ -173,7 +182,7 @@ fn check_whitelist(links: &[SpecLink]) -> Result<()> {
|
|||||||
/// Check if the URL points to a valid page and a valid fragment.
|
/// Check if the URL points to a valid page and a valid fragment.
|
||||||
fn check_targets(links: &[SpecLink]) -> Result<()> {
|
fn check_targets(links: &[SpecLink]) -> Result<()> {
|
||||||
// Map page URL => IDs in the page.
|
// Map page URL => IDs in the page.
|
||||||
let mut page_cache: HashMap<String, Result<HashSet<String>>> = HashMap::new();
|
let mut page_cache: HashMap<String, Result<HashMap<String, HasDuplicates>>> = HashMap::new();
|
||||||
let mut err_nb: u16 = 0;
|
let mut err_nb: u16 = 0;
|
||||||
|
|
||||||
for link in links {
|
for link in links {
|
||||||
@ -183,7 +192,16 @@ fn check_targets(links: &[SpecLink]) -> Result<()> {
|
|||||||
match page_cache.entry(url.to_owned()).or_insert_with(|| get_page_ids(url)) {
|
match page_cache.entry(url.to_owned()).or_insert_with(|| get_page_ids(url)) {
|
||||||
Ok(ids) => {
|
Ok(ids) => {
|
||||||
if let Some(fragment) = fragment {
|
if let Some(fragment) = fragment {
|
||||||
if !ids.contains(fragment) {
|
if let Some(has_duplicates) = ids.get(fragment) {
|
||||||
|
// Don't allow links to the latest spec with duplicate IDs, they might point
|
||||||
|
// to another part of the spec in a new version.
|
||||||
|
if *has_duplicates == HasDuplicates::Yes
|
||||||
|
&& link.url[link.min_len()..].starts_with("latest/")
|
||||||
|
{
|
||||||
|
err_nb += 1;
|
||||||
|
print_link_err("Spec link to latest version with non-unique ID", link);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
err_nb += 1;
|
err_nb += 1;
|
||||||
print_link_err("Spec link with wrong fragment", link);
|
print_link_err("Spec link with wrong fragment", link);
|
||||||
}
|
}
|
||||||
@ -208,24 +226,24 @@ fn check_targets(links: &[SpecLink]) -> Result<()> {
|
|||||||
/// Get the IDs in the given webpage.
|
/// Get the IDs in the given webpage.
|
||||||
///
|
///
|
||||||
/// Returns an error if the URL points to an invalid HTML page.
|
/// Returns an error if the URL points to an invalid HTML page.
|
||||||
fn get_page_ids(url: &str) -> Result<HashSet<String>> {
|
fn get_page_ids(url: &str) -> Result<HashMap<String, HasDuplicates>> {
|
||||||
let mut page = isahc::get(url)?;
|
let mut page = isahc::get(url)?;
|
||||||
|
|
||||||
let html = page.text()?;
|
let html = page.text()?;
|
||||||
let mut ids = HashSet::new();
|
let mut ids = HashMap::new();
|
||||||
|
|
||||||
for token in Tokenizer::new(&html).infallible() {
|
for token in Tokenizer::new(&html).infallible() {
|
||||||
let Token::StartTag(tag) = token else {
|
let Token::StartTag(tag) = token else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
let Some(mut id) = tag.attributes.get(b"id".as_slice())
|
let Some(id) = tag.attributes.get(b"id".as_slice())
|
||||||
.and_then(|s| String::from_utf8(s.0.clone()).ok()) else {
|
.and_then(|s| String::from_utf8(s.0.clone()).ok()) else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
id = uniquify_heading_id(id, &ids);
|
let (id, has_duplicates) = uniquify_heading_id(id, &mut ids);
|
||||||
ids.insert(id);
|
ids.insert(id, has_duplicates);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(ids)
|
Ok(ids)
|
||||||
@ -239,16 +257,22 @@ fn get_page_ids(url: &str) -> Result<HashSet<String>> {
|
|||||||
/// This is a reimplementation of the algorithm used for the spec.
|
/// This is a reimplementation of the algorithm used for the spec.
|
||||||
///
|
///
|
||||||
/// See <https://github.com/matrix-org/matrix-spec/blob/6b02e393082570db2d0a651ddb79a365bc4a0f8d/static/js/toc.js#L25-L37>.
|
/// See <https://github.com/matrix-org/matrix-spec/blob/6b02e393082570db2d0a651ddb79a365bc4a0f8d/static/js/toc.js#L25-L37>.
|
||||||
fn uniquify_heading_id(mut id: String, unique_ids: &HashSet<String>) -> String {
|
fn uniquify_heading_id(
|
||||||
|
mut id: String,
|
||||||
|
unique_ids: &mut HashMap<String, HasDuplicates>,
|
||||||
|
) -> (String, HasDuplicates) {
|
||||||
let base_id = id.clone();
|
let base_id = id.clone();
|
||||||
let mut counter: u16 = 0;
|
let mut counter: u16 = 0;
|
||||||
|
let mut has_duplicates = HasDuplicates::No;
|
||||||
|
|
||||||
while unique_ids.contains(&id) {
|
while let Some(other_id_has_dup) = unique_ids.get_mut(&id) {
|
||||||
|
has_duplicates = HasDuplicates::Yes;
|
||||||
|
*other_id_has_dup = HasDuplicates::Yes;
|
||||||
counter += 1;
|
counter += 1;
|
||||||
id = format!("{base_id}-{counter}");
|
id = format!("{base_id}-{counter}");
|
||||||
}
|
}
|
||||||
|
|
||||||
id
|
(id, has_duplicates)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_link_err(error: &str, link: &SpecLink) {
|
fn print_link_err(error: &str, link: &SpecLink) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user