From bab06ed3759d182be55482d8964a4020c532de92 Mon Sep 17 00:00:00 2001 From: Charles Hall Date: Wed, 27 Nov 2024 20:35:00 -0800 Subject: [PATCH] stateres: make separate O(n) instead of O(n^2) This way my poor CPU only has to do ~600,000 iterations to resolve Matrix HQ from scratch. The old algorithm required ~85,000,000,000. As a treat, we can also drop the dependency on itertools. --- crates/ruma-state-res/Cargo.toml | 1 - crates/ruma-state-res/src/lib.rs | 43 +++++++++++++++++++------------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/crates/ruma-state-res/Cargo.toml b/crates/ruma-state-res/Cargo.toml index 29a32875..b22f0927 100644 --- a/crates/ruma-state-res/Cargo.toml +++ b/crates/ruma-state-res/Cargo.toml @@ -19,7 +19,6 @@ unstable-exhaustive-types = [] [dependencies] futures-util = "0.3" -itertools = "0.13.0" js_int = { workspace = true } ruma-common = { workspace = true, features = ["api"] } ruma-events = { workspace = true } diff --git a/crates/ruma-state-res/src/lib.rs b/crates/ruma-state-res/src/lib.rs index 93d2cb22..09898a22 100644 --- a/crates/ruma-state-res/src/lib.rs +++ b/crates/ruma-state-res/src/lib.rs @@ -6,7 +6,6 @@ use std::{ }; use futures_util::{future, stream, Future, StreamExt}; -use itertools::Itertools; use js_int::{int, Int}; use ruma_common::{EventId, MilliSecondsSinceUnixEpoch, RoomVersionId}; use ruma_events::{ @@ -177,27 +176,32 @@ where /// not exactly one event ID. This includes missing events, if one state_set includes an event that /// none of the other have this is a conflicting event. fn separate<'a, Id>( - state_sets_iter: impl Iterator> + Clone, + state_sets_iter: impl Iterator>, ) -> (StateMap, StateMap>) where - Id: Clone + Eq + 'a, + Id: Clone + Eq + Hash + 'a, { + let mut state_set_count = 0_usize; + let mut occurrences = HashMap::<_, HashMap<_, _>>::new(); + + let state_sets_iter = state_sets_iter.inspect(|_| state_set_count += 1); + for (k, v) in state_sets_iter.flatten() { + occurrences.entry(k).or_default().entry(v).and_modify(|x| *x += 1).or_insert(1); + } + let mut unconflicted_state = StateMap::new(); let mut conflicted_state = StateMap::new(); - for key in state_sets_iter.clone().flat_map(|map| map.keys()).unique() { - let mut event_ids = - state_sets_iter.clone().map(|state_set| state_set.get(key)).collect::>(); - - if event_ids.iter().all_equal() { - // First .unwrap() is okay because - // * event_ids has the same length as state_sets - // * we never enter the loop this code is in if state_sets is empty - let id = event_ids.pop().unwrap().expect("unconflicting `EventId` is not None"); - unconflicted_state.insert(key.clone(), id.clone()); - } else { - conflicted_state - .insert(key.clone(), event_ids.into_iter().filter_map(|o| o.cloned()).collect()); + for (k, v) in occurrences { + for (id, occurrence_count) in v { + if occurrence_count == state_set_count { + unconflicted_state.insert((k.0.clone(), k.1.clone()), id.clone()); + } else { + conflicted_state + .entry((k.0.clone(), k.1.clone())) + .and_modify(|x: &mut Vec<_>| x.push(id.clone())) + .or_insert(vec![id.clone()]); + } } } @@ -1414,7 +1418,7 @@ mod tests { #[test] fn separate_conflicted() { - let (unconflicted, conflicted) = super::separate( + let (unconflicted, mut conflicted) = super::separate( [ state_set![StateEventType::RoomMember => "@a:hs1" => 0], state_set![StateEventType::RoomMember => "@a:hs1" => 1], @@ -1423,6 +1427,11 @@ mod tests { .iter(), ); + // HashMap iteration order is random, so sort this before asserting on it + for v in conflicted.values_mut() { + v.sort_unstable(); + } + assert_eq!(unconflicted, StateMap::new()); assert_eq!( conflicted,