common: Use the same percent encoding set consistently

This commit is contained in:
Kévin Commaille 2023-01-24 17:35:17 +01:00 committed by Kévin Commaille
parent 3d74117dd4
commit 06820cdc92
5 changed files with 52 additions and 63 deletions

View File

@ -9,27 +9,14 @@ use http::{
header::{self, HeaderName, HeaderValue}, header::{self, HeaderName, HeaderValue},
Method, Method,
}; };
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use percent_encoding::utf8_percent_encode;
use tracing::warn; use tracing::warn;
use super::{ use super::{
error::{IntoHttpError, UnknownVersionError}, error::{IntoHttpError, UnknownVersionError},
AuthScheme, SendAccessToken, AuthScheme, SendAccessToken,
}; };
use crate::{serde::slice_to_buf, RoomVersionId}; use crate::{percent_encode::PATH_PERCENT_ENCODE_SET, serde::slice_to_buf, RoomVersionId};
// The path percent-encode set as defined in the WHATWG URL standard
// <https://url.spec.whatwg.org/#path-percent-encode-set>
const PATH_PERCENT_ENCODE_SET: &AsciiSet = &CONTROLS
.add(b' ')
.add(b'"')
.add(b'#')
.add(b'<')
.add(b'>')
.add(b'?')
.add(b'`')
.add(b'{')
.add(b'}');
/// Metadata about an API endpoint. /// Metadata about an API endpoint.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View File

@ -2,7 +2,7 @@
use std::{fmt, str::FromStr}; use std::{fmt, str::FromStr};
use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, CONTROLS}; use percent_encoding::{percent_decode_str, percent_encode};
use ruma_identifiers_validation::{ use ruma_identifiers_validation::{
error::{MatrixIdError, MatrixToError, MatrixUriError}, error::{MatrixIdError, MatrixToError, MatrixUriError},
Error, Error,
@ -13,36 +13,10 @@ use super::{
EventId, OwnedEventId, OwnedRoomAliasId, OwnedRoomId, OwnedRoomOrAliasId, OwnedServerName, EventId, OwnedEventId, OwnedRoomAliasId, OwnedRoomId, OwnedRoomOrAliasId, OwnedServerName,
OwnedUserId, RoomAliasId, RoomId, RoomOrAliasId, UserId, OwnedUserId, RoomAliasId, RoomId, RoomOrAliasId, UserId,
}; };
use crate::{PrivOwnedStr, ServerName}; use crate::{percent_encode::PATH_PERCENT_ENCODE_SET, PrivOwnedStr, ServerName};
const MATRIX_TO_BASE_URL: &str = "https://matrix.to/#/"; const MATRIX_TO_BASE_URL: &str = "https://matrix.to/#/";
const MATRIX_SCHEME: &str = "matrix"; const MATRIX_SCHEME: &str = "matrix";
// Controls + Space + non-path characters from RFC 3986. In practice only the
// non-path characters will be encountered most likely, but better be safe.
// https://datatracker.ietf.org/doc/html/rfc3986/#page-23
const NON_PATH: &AsciiSet = &CONTROLS.add(b'/').add(b'?').add(b'#').add(b'[').add(b']');
// Controls + Space + reserved characters from RFC 3986. In practice only the
// reserved characters will be encountered most likely, but better be safe.
// https://datatracker.ietf.org/doc/html/rfc3986/#page-13
const RESERVED: &AsciiSet = &CONTROLS
.add(b':')
.add(b'/')
.add(b'?')
.add(b'#')
.add(b'[')
.add(b']')
.add(b'@')
.add(b'!')
.add(b'$')
.add(b'&')
.add(b'\'')
.add(b'(')
.add(b')')
.add(b'*')
.add(b'+')
.add(b',')
.add(b';')
.add(b'=');
/// All Matrix Identifiers that can be represented as a Matrix URI. /// All Matrix Identifiers that can be represented as a Matrix URI.
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
@ -153,15 +127,19 @@ impl MatrixId {
/// a slash. /// a slash.
pub(crate) fn to_string_with_sigil(&self) -> String { pub(crate) fn to_string_with_sigil(&self) -> String {
match self { match self {
Self::Room(room_id) => percent_encode(room_id.as_bytes(), RESERVED).to_string(), Self::Room(room_id) => {
Self::RoomAlias(room_alias) => { percent_encode(room_id.as_bytes(), PATH_PERCENT_ENCODE_SET).to_string()
percent_encode(room_alias.as_bytes(), RESERVED).to_string() }
Self::RoomAlias(room_alias) => {
percent_encode(room_alias.as_bytes(), PATH_PERCENT_ENCODE_SET).to_string()
}
Self::User(user_id) => {
percent_encode(user_id.as_bytes(), PATH_PERCENT_ENCODE_SET).to_string()
} }
Self::User(user_id) => percent_encode(user_id.as_bytes(), RESERVED).to_string(),
Self::Event(room_id, event_id) => format!( Self::Event(room_id, event_id) => format!(
"{}/{}", "{}/{}",
percent_encode(room_id.as_bytes(), RESERVED), percent_encode(room_id.as_bytes(), PATH_PERCENT_ENCODE_SET),
percent_encode(event_id.as_bytes(), RESERVED), percent_encode(event_id.as_bytes(), PATH_PERCENT_ENCODE_SET),
), ),
} }
} }
@ -176,21 +154,27 @@ impl MatrixId {
pub(crate) fn to_string_with_type(&self) -> String { pub(crate) fn to_string_with_type(&self) -> String {
match self { match self {
Self::Room(room_id) => { Self::Room(room_id) => {
format!("roomid/{}", percent_encode(&room_id.as_bytes()[1..], NON_PATH)) format!(
"roomid/{}",
percent_encode(&room_id.as_bytes()[1..], PATH_PERCENT_ENCODE_SET)
)
} }
Self::RoomAlias(room_alias) => { Self::RoomAlias(room_alias) => {
format!("r/{}", percent_encode(&room_alias.as_bytes()[1..], NON_PATH)) format!(
"r/{}",
percent_encode(&room_alias.as_bytes()[1..], PATH_PERCENT_ENCODE_SET)
)
} }
Self::User(user_id) => { Self::User(user_id) => {
format!("u/{}", percent_encode(&user_id.as_bytes()[1..], NON_PATH)) format!("u/{}", percent_encode(&user_id.as_bytes()[1..], PATH_PERCENT_ENCODE_SET))
} }
Self::Event(room_id, event_id) => { Self::Event(room_id, event_id) => {
let room_type = if room_id.is_room_id() { "roomid" } else { "r" }; let room_type = if room_id.is_room_id() { "roomid" } else { "r" };
format!( format!(
"{}/{}/e/{}", "{}/{}/e/{}",
room_type, room_type,
percent_encode(&room_id.as_bytes()[1..], NON_PATH), percent_encode(&room_id.as_bytes()[1..], PATH_PERCENT_ENCODE_SET),
percent_encode(&event_id.as_bytes()[1..], NON_PATH), percent_encode(&event_id.as_bytes()[1..], PATH_PERCENT_ENCODE_SET),
) )
} }
} }
@ -572,33 +556,33 @@ mod tests {
fn display_matrixtouri() { fn display_matrixtouri() {
assert_eq!( assert_eq!(
user_id!("@jplatte:notareal.hs").matrix_to_uri().to_string(), user_id!("@jplatte:notareal.hs").matrix_to_uri().to_string(),
"https://matrix.to/#/%40jplatte%3Anotareal.hs" "https://matrix.to/#/@jplatte:notareal.hs"
); );
assert_eq!( assert_eq!(
room_alias_id!("#ruma:notareal.hs").matrix_to_uri().to_string(), room_alias_id!("#ruma:notareal.hs").matrix_to_uri().to_string(),
"https://matrix.to/#/%23ruma%3Anotareal.hs" "https://matrix.to/#/%23ruma:notareal.hs"
); );
assert_eq!( assert_eq!(
room_id!("!ruma:notareal.hs").matrix_to_uri().to_string(), room_id!("!ruma:notareal.hs").matrix_to_uri().to_string(),
"https://matrix.to/#/%21ruma%3Anotareal.hs" "https://matrix.to/#/!ruma:notareal.hs"
); );
assert_eq!( assert_eq!(
room_id!("!ruma:notareal.hs") room_id!("!ruma:notareal.hs")
.matrix_to_uri_via(vec![server_name!("notareal.hs")]) .matrix_to_uri_via(vec![server_name!("notareal.hs")])
.to_string(), .to_string(),
"https://matrix.to/#/%21ruma%3Anotareal.hs?via=notareal.hs" "https://matrix.to/#/!ruma:notareal.hs?via=notareal.hs"
); );
assert_eq!( assert_eq!(
room_alias_id!("#ruma:notareal.hs") room_alias_id!("#ruma:notareal.hs")
.matrix_to_event_uri(event_id!("$event:notareal.hs")) .matrix_to_event_uri(event_id!("$event:notareal.hs"))
.to_string(), .to_string(),
"https://matrix.to/#/%23ruma%3Anotareal.hs/%24event%3Anotareal.hs" "https://matrix.to/#/%23ruma:notareal.hs/$event:notareal.hs"
); );
assert_eq!( assert_eq!(
room_id!("!ruma:notareal.hs") room_id!("!ruma:notareal.hs")
.matrix_to_event_uri(event_id!("$event:notareal.hs")) .matrix_to_event_uri(event_id!("$event:notareal.hs"))
.to_string(), .to_string(),
"https://matrix.to/#/%21ruma%3Anotareal.hs/%24event%3Anotareal.hs" "https://matrix.to/#/!ruma:notareal.hs/$event:notareal.hs"
); );
assert_eq!( assert_eq!(
room_id!("!ruma:notareal.hs") room_id!("!ruma:notareal.hs")
@ -607,7 +591,7 @@ mod tests {
vec![server_name!("notareal.hs")] vec![server_name!("notareal.hs")]
) )
.to_string(), .to_string(),
"https://matrix.to/#/%21ruma%3Anotareal.hs/%24event%3Anotareal.hs?via=notareal.hs" "https://matrix.to/#/!ruma:notareal.hs/$event:notareal.hs?via=notareal.hs"
); );
} }

View File

@ -55,7 +55,7 @@ impl RoomId {
/// ///
/// assert_eq!( /// assert_eq!(
/// room_id!("!somewhere:example.org").matrix_to_uri().to_string(), /// room_id!("!somewhere:example.org").matrix_to_uri().to_string(),
/// "https://matrix.to/#/%21somewhere%3Aexample.org" /// "https://matrix.to/#/!somewhere:example.org"
/// ); /// );
/// ``` /// ```
pub fn matrix_to_uri(&self) -> MatrixToUri { pub fn matrix_to_uri(&self) -> MatrixToUri {
@ -77,7 +77,7 @@ impl RoomId {
/// room_id!("!somewhere:example.org") /// room_id!("!somewhere:example.org")
/// .matrix_to_uri_via([&*server_name!("example.org"), &*server_name!("alt.example.org")]) /// .matrix_to_uri_via([&*server_name!("example.org"), &*server_name!("alt.example.org")])
/// .to_string(), /// .to_string(),
/// "https://matrix.to/#/%21somewhere%3Aexample.org?via=example.org&via=alt.example.org" /// "https://matrix.to/#/!somewhere:example.org?via=example.org&via=alt.example.org"
/// ); /// );
/// ``` /// ```
/// ///

View File

@ -27,6 +27,7 @@ pub mod encryption;
#[cfg(feature = "events")] #[cfg(feature = "events")]
pub mod events; pub mod events;
mod identifiers; mod identifiers;
mod percent_encode;
pub mod power_levels; pub mod power_levels;
pub mod presence; pub mod presence;
pub mod push; pub mod push;

View File

@ -0,0 +1,17 @@
use percent_encoding::{AsciiSet, CONTROLS};
/// The [path percent-encode set] as defined in the WHATWG URL standard + `/` since
/// we always encode single segments of the path.
///
/// [path percent-encode set]: https://url.spec.whatwg.org/#path-percent-encode-set
pub(crate) const PATH_PERCENT_ENCODE_SET: &AsciiSet = &CONTROLS
.add(b' ')
.add(b'"')
.add(b'#')
.add(b'<')
.add(b'>')
.add(b'?')
.add(b'`')
.add(b'{')
.add(b'}')
.add(b'/');