merge bunch of identifier changes from upstream

Signed-off-by: strawberry <june@girlboss.ceo>
This commit is contained in:
strawberry 2025-02-16 16:29:46 -05:00
parent 6770d56166
commit bb2b381cad
No known key found for this signature in database
10 changed files with 221 additions and 67 deletions

View file

@ -36,10 +36,6 @@ unstable-unspecified = []
# Don't validate `ServerSigningKeyVersion`.
compat-server-signing-key-version = ["ruma-identifiers-validation/compat-server-signing-key-version"]
# Allow some user IDs that are invalid even with the specified historical
# user ID scheme.
compat-user-id = ["ruma-identifiers-validation/compat-user-id"]
# Allow some mandatory fields to be missing, defaulting them to an empty string
# in deserialization.
compat-empty-string-null = []

View file

@ -28,9 +28,7 @@ pub struct RoomId(str);
impl RoomId {
/// Attempts to generate a `RoomId` for the given origin server with a localpart consisting of
/// 18 random ASCII characters.
///
/// Fails if the given homeserver cannot be parsed as a valid host.
/// 18 random ASCII characters, as recommended in the spec.
#[cfg(feature = "rand")]
#[allow(clippy::new_ret_no_self)]
pub fn new(server_name: &ServerName) -> OwnedRoomId {

View file

@ -2,6 +2,10 @@
use std::{rc::Rc, sync::Arc};
pub use ruma_identifiers_validation::user_id::localpart_is_fully_conforming;
use ruma_identifiers_validation::{localpart_is_backwards_compatible, MAX_BYTES};
use ruma_macros::IdZst;
use super::{matrix_uri::UriAction, IdParseError, MatrixToUri, MatrixUri, ServerName};
/// A Matrix [user ID].
@ -53,7 +57,7 @@ impl UserId {
if id_str.starts_with('@') {
Self::parse_into_owned(id.into())
} else {
let _ = localpart_is_fully_conforming(id_str)?;
localpart_is_backwards_compatible(id_str)?;
Ok(Self::from_borrowed(&format!("@{id_str}:{server_name}")).to_owned())
}
}
@ -70,7 +74,7 @@ impl UserId {
if id_str.starts_with('@') {
Self::parse_rc(id)
} else {
let _ = localpart_is_fully_conforming(id_str)?;
localpart_is_backwards_compatible(id_str)?;
Ok(Self::from_rc(format!("@{id_str}:{server_name}").into()))
}
}
@ -87,7 +91,7 @@ impl UserId {
if id_str.starts_with('@') {
Self::parse_arc(id)
} else {
let _ = localpart_is_fully_conforming(id_str)?;
localpart_is_backwards_compatible(id_str)?;
Ok(Self::from_arc(format!("@{id_str}:{server_name}").into()))
}
}
@ -102,13 +106,59 @@ impl UserId {
ServerName::from_borrowed(&self.as_str()[self.colon_idx() + 1..])
}
/// Validate this user ID against the strict or historical grammar.
///
/// Returns an `Err` for invalid user IDs, `Ok(false)` for historical user IDs
/// and `Ok(true)` for fully conforming user IDs.
fn validate_fully_conforming(&self) -> Result<bool, IdParseError> {
// Since the length check can be disabled with `compat-arbitrary-length-ids`, check it again
// here.
if self.as_bytes().len() > MAX_BYTES {
return Err(IdParseError::MaximumLengthExceeded);
}
localpart_is_fully_conforming(self.localpart())
}
/// Validate this user ID against the [strict grammar].
///
/// This should be used to validate newly created user IDs as historical user IDs are
/// deprecated.
///
/// [strict grammar]: https://spec.matrix.org/latest/appendices/#user-identifiers
pub fn validate_strict(&self) -> Result<(), IdParseError> {
let is_fully_conforming = self.validate_fully_conforming()?;
if is_fully_conforming {
Ok(())
} else {
Err(IdParseError::InvalidCharacters)
}
}
/// Validate this user ID against the [historical grammar].
///
/// According to the spec, servers should check events received over federation that contain
/// user IDs with this method, and those that fail should not be forwarded to their users.
///
/// Contrary to [`UserId::is_historical()`] this method also includes user IDs that conform to
/// the latest grammar.
///
/// [historical grammar]: https://spec.matrix.org/latest/appendices/#historical-user-ids
pub fn validate_historical(&self) -> Result<(), IdParseError> {
self.validate_fully_conforming()?;
Ok(())
}
/// Whether this user ID is a historical one.
///
/// A historical user ID is one that doesn't conform to the latest specification of the user ID
/// grammar but is still accepted because it was previously allowed.
/// A [historical user ID] is one that doesn't conform to the latest specification of the user
/// ID grammar but is still accepted because it was previously allowed.
///
/// [historical user ID]: https://spec.matrix.org/latest/appendices/#historical-user-ids
#[inline]
pub fn is_historical(&self) -> bool {
!localpart_is_fully_conforming(self.localpart()).unwrap()
self.validate_fully_conforming().is_ok_and(|is_fully_conforming| !is_fully_conforming)
}
/// Create a `matrix.to` URI for this user ID.
@ -155,9 +205,6 @@ impl UserId {
}
}
pub use ruma_identifiers_validation::user_id::localpart_is_fully_conforming;
use ruma_macros::IdZst;
#[cfg(test)]
mod tests {
use super::{OwnedUserId, UserId};
@ -170,6 +217,8 @@ mod tests {
assert_eq!(user_id.localpart(), "carl");
assert_eq!(user_id.server_name(), "example.com");
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap();
}
#[test]
@ -181,6 +230,8 @@ mod tests {
assert_eq!(user_id.localpart(), "carl");
assert_eq!(user_id.server_name(), "example.com");
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap();
}
#[test]
@ -192,24 +243,87 @@ mod tests {
assert_eq!(user_id.localpart(), "carl");
assert_eq!(user_id.server_name(), "example.com");
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap();
}
#[cfg(not(feature = "compat-user-id"))]
#[test]
fn invalid_user_id() {
fn backwards_compatible_user_id() {
let localpart = "τ";
let user_id = "@τ:example.com";
let user_id_str = "@τ:example.com";
let server_name = server_name!("example.com");
<&UserId>::try_from(user_id).unwrap_err();
UserId::parse_with_server_name(user_id, server_name).unwrap_err();
UserId::parse_with_server_name(localpart, server_name).unwrap_err();
UserId::parse_with_server_name_rc(user_id, server_name).unwrap_err();
UserId::parse_with_server_name_rc(localpart, server_name).unwrap_err();
UserId::parse_with_server_name_arc(user_id, server_name).unwrap_err();
UserId::parse_with_server_name_arc(localpart, server_name).unwrap_err();
UserId::parse_rc(user_id).unwrap_err();
UserId::parse_arc(user_id).unwrap_err();
let user_id = <&UserId>::try_from(user_id_str).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_with_server_name(user_id_str, server_name).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_with_server_name(localpart, server_name).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_with_server_name_rc(user_id_str, server_name).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_with_server_name_rc(localpart, server_name).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_with_server_name_arc(user_id_str, server_name).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_with_server_name_arc(localpart, server_name).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_rc(user_id_str).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
let user_id = UserId::parse_arc(user_id_str).unwrap();
assert_eq!(user_id.as_str(), user_id_str);
assert_eq!(user_id.localpart(), localpart);
assert_eq!(user_id.server_name(), server_name);
assert!(!user_id.is_historical());
user_id.validate_historical().unwrap_err();
user_id.validate_strict().unwrap_err();
}
#[test]
@ -225,6 +339,8 @@ mod tests {
assert_eq!(user_id.localpart(), "a%b[irc]");
assert_eq!(user_id.server_name(), "example.com");
assert!(user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap_err();
}
#[test]
@ -236,6 +352,8 @@ mod tests {
assert_eq!(user_id.localpart(), "a%b[irc]");
assert_eq!(user_id.server_name(), "example.com");
assert!(user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap_err();
}
#[test]
@ -247,6 +365,8 @@ mod tests {
assert_eq!(user_id.localpart(), "a%b[irc]");
assert_eq!(user_id.server_name(), "example.com");
assert!(user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap_err();
}
#[test]
@ -254,6 +374,8 @@ mod tests {
let user_id = <&UserId>::try_from("@CARL:example.com").expect("Failed to create UserId.");
assert_eq!(user_id.as_str(), "@CARL:example.com");
assert!(user_id.is_historical());
user_id.validate_historical().unwrap();
user_id.validate_strict().unwrap_err();
}
#[cfg(feature = "rand")]
@ -309,12 +431,10 @@ mod tests {
}
#[test]
#[cfg(not(feature = "compat-user-id"))]
fn invalid_characters_in_user_id_localpart() {
assert_eq!(
<&UserId>::try_from("@te\nst:example.com").unwrap_err(),
IdParseError::InvalidCharacters
);
let user_id = <&UserId>::try_from("@te\nst:example.com").unwrap();
assert_eq!(user_id.validate_historical().unwrap_err(), IdParseError::InvalidCharacters);
assert_eq!(user_id.validate_strict().unwrap_err(), IdParseError::InvalidCharacters);
}
#[test]

View file

@ -15,10 +15,6 @@ all-features = true
# Don't validate the version in `server_signing_key_version::validate`.
compat-server-signing-key-version = []
# Allow some user IDs that are invalid even with the specified historical
# user ID scheme.
compat-user-id = []
[dependencies]
js_int = { workspace = true }
thiserror = { workspace = true }

View file

@ -53,3 +53,18 @@ pub trait KeyName: AsRef<str> {
/// Validate the given string for this name.
fn validate(s: &str) -> Result<(), Error>;
}
/// Check whether the Matrix identifier localpart is [allowed over federation].
///
/// According to the spec, localparts can consist of any legal non-surrogate Unicode code points
/// except for `:` and `NUL` (`U+0000`).
///
/// [allowed over federation]: https://spec.matrix.org/latest/appendices/#historical-user-ids
pub fn localpart_is_backwards_compatible(localpart: &str) -> Result<(), Error> {
let is_invalid = localpart.contains([':', '\0']);
if is_invalid {
Err(Error::InvalidCharacters)
} else {
Ok(())
}
}

View file

@ -1,5 +1,13 @@
use crate::{validate_delimited_id, Error};
use crate::{localpart_is_backwards_compatible, parse_id, Error};
/// Validate a [room alias] as used by clients and servers.
///
/// [room alias]: https://spec.matrix.org/latest/appendices/#room-aliases
pub fn validate(s: &str) -> Result<(), Error> {
validate_delimited_id(s, b'#')
let colon_idx = parse_id(s, b'#')?;
let localpart = &s[1..colon_idx];
localpart_is_backwards_compatible(localpart)?;
Ok(())
}

View file

@ -1,5 +1,15 @@
use crate::{validate_id, Error};
/// Validate a [room ID] as used by clients.
///
/// [room ID]: https://spec.matrix.org/latest/appendices/#room-ids
pub fn validate(s: &str) -> Result<(), Error> {
validate_id(s, b'!')
validate_id(s, b'!')?;
// Since we cannot check the localpart, check at least the NUL byte.
if s.as_bytes().contains(&b'\0') {
return Err(Error::InvalidCharacters);
}
Ok(())
}

View file

@ -1,9 +1,33 @@
use crate::{parse_id, Error};
use crate::{localpart_is_backwards_compatible, parse_id, Error, MAX_BYTES};
/// Validate a [user ID] as used by clients.
///
/// [user ID]: https://spec.matrix.org/latest/appendices/#user-identifiers
pub fn validate(s: &str) -> Result<(), Error> {
let colon_idx = parse_id(s, b'@')?;
let localpart = &s[1..colon_idx];
let _ = localpart_is_fully_conforming(localpart)?;
localpart_is_backwards_compatible(localpart)?;
Ok(())
}
/// Validate a [user ID] to follow the spec recommendations when generating them.
///
/// [user ID]: https://spec.matrix.org/latest/appendices/#user-identifiers
pub fn validate_strict(s: &str) -> Result<(), Error> {
// Since the length check can be disabled with `compat-arbitrary-length-ids`, check it again
// here.
if s.len() > MAX_BYTES {
return Err(Error::MaximumLengthExceeded);
}
let colon_idx = parse_id(s, b'@')?;
let localpart = &s[1..colon_idx];
if !localpart_is_fully_conforming(localpart)? {
return Err(Error::InvalidCharacters);
}
Ok(())
}
@ -12,33 +36,23 @@ pub fn validate(s: &str) -> Result<(), Error> {
///
/// Returns an `Err` for invalid user ID localparts, `Ok(false)` for historical user ID localparts
/// and `Ok(true)` for fully conforming user ID localparts.
///
/// With the `compat` feature enabled, this will also return `Ok(false)` for invalid user ID
/// localparts. User IDs that don't even meet the historical user ID restrictions exist in the wild
/// due to Synapse allowing them over federation. This will likely be fixed in an upcoming room
/// version; see [MSC2828](https://github.com/matrix-org/matrix-spec-proposals/pull/2828).
pub fn localpart_is_fully_conforming(localpart: &str) -> Result<bool, Error> {
if localpart.is_empty() {
return Err(Error::Empty);
}
// See https://spec.matrix.org/latest/appendices/#user-identifiers
let is_fully_conforming = !localpart.is_empty()
&& localpart.bytes().all(
|b| matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'-' | b'.' | b'=' | b'_' | b'/' | b'+'),
);
let is_fully_conforming = localpart
.bytes()
.all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'-' | b'.' | b'=' | b'_' | b'/' | b'+'));
if !is_fully_conforming {
// If it's not fully conforming, check if it contains characters that are also disallowed
// for historical user IDs, or is empty. If that's the case, return an error.
// See https://spec.matrix.org/latest/appendices/#historical-user-ids
#[cfg(not(feature = "compat-user-id"))]
let is_invalid =
localpart.is_empty() || localpart.bytes().any(|b| b < 0x21 || b == b':' || b > 0x7E);
let is_invalid_historical = localpart.bytes().any(|b| b < 0x21 || b == b':' || b > 0x7E);
// In compat mode, allow anything except `:` to match Synapse. The `:` check is only needed
// because this function can be called through `UserId::parse_with_servername`, otherwise
// it would be impossible for the input to contain a `:`.
#[cfg(feature = "compat-user-id")]
let is_invalid = localpart.as_bytes().contains(&b':');
if is_invalid {
if is_invalid_historical {
return Err(Error::InvalidCharacters);
}
}

View file

@ -274,10 +274,12 @@ pub fn mxc_uri(input: TokenStream) -> TokenStream {
}
/// Compile-time checked `UserId` construction.
///
/// The user ID is validated using the same rules as `UserId::validate_strict()`.
#[proc_macro]
pub fn user_id(input: TokenStream) -> TokenStream {
let IdentifierInput { dollar_crate, id } = parse_macro_input!(input as IdentifierInput);
assert!(user_id::validate(&id.value()).is_ok(), "Invalid user_id");
assert!(user_id::validate_strict(&id.value()).is_ok(), "Invalid user_id");
let output = quote! {
<&#dollar_crate::UserId as ::std::convert::TryFrom<&str>>::try_from(#id).unwrap()

View file

@ -134,7 +134,6 @@ full = [
# Enable all compatibility hacks. Deprecated.
compat = [
"compat-server-signing-key-version",
"compat-user-id",
"compat-empty-string-null",
"compat-null",
"compat-optional",
@ -147,10 +146,6 @@ compat = [
# Don't validate `ServerSigningKeyVersion`.
compat-server-signing-key-version = ["ruma-common/compat-server-signing-key-version"]
# Allow some user IDs that are invalid even with the specified historical
# user ID scheme.
compat-user-id = ["ruma-common/compat-user-id"]
# Allow some mandatory fields in requests / responses to be missing, defaulting
# them to an empty string in deserialization.
compat-empty-string-null = [