diff --git a/compiler/cl-structures/src/arena/global_intern.rs b/compiler/cl-structures/src/arena/global_intern.rs deleted file mode 100644 index 43ace68..0000000 --- a/compiler/cl-structures/src/arena/global_intern.rs +++ /dev/null @@ -1,130 +0,0 @@ -//! A global intern pool for strings, represented by the [Sym] symbol - -use super::{intern::Interner, symbol::Symbol}; -use std::{ - fmt::Display, - num::NonZeroU32, - sync::{OnceLock, RwLock}, -}; - -/// Holds a globally accessible [Interner] which uses [Sym] as its [Symbol] -static GLOBAL_INTERNER: OnceLock>> = OnceLock::new(); - -/// A unique identifier corresponding to a particular globally-interned [String]. -/// -/// Copies of that string can be obtained with [Sym::get] or [String::try_from]. -/// -/// New strings can be interned with [Sym::new] or [Sym::from] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct Sym(NonZeroU32); - -impl Sym { - /// Gets the interned [Sym] for the given value, or interns a new one. - /// - /// # Blocks - /// This conversion blocks if the Global Interner lock is held. - /// - /// # May Panic - /// Panics if the Global Interner's lock has been poisoned by a panic in another thread - pub fn new(value: &str) -> Self { - GLOBAL_INTERNER - .get_or_init(Default::default) - .write() - .expect("global interner should not be poisoned in another thread") - .get_or_insert(value) - } - /// Gets a [Sym] associated with the given string, if one already exists - pub fn try_from_str(value: &str) -> Option { - GLOBAL_INTERNER.get()?.read().ok()?.get(value) - } - - /// Gets a copy of the value of the [Sym] - // TODO: Make this copy-less - pub fn get(self) -> Option { - String::try_from(self).ok() - } - - /// Looks up the string associated with this [Sym], - /// and performs a transformation on it if it exists. - pub fn map(&self, f: impl Fn(&str) -> T) -> Option { - Some(f(GLOBAL_INTERNER.get()?.read().ok()?.get_str(*self)?)) - } -} - -impl Display for Sym { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Some(interner) = GLOBAL_INTERNER.get() else { - return write!(f, "[sym@{} (uninitialized)]", self.0); - }; - let Ok(interner) = interner.read() else { - return write!(f, "[sym@{} (poisoned)]", self.0); - }; - let Some(str) = interner.get_str(*self) else { - return write!(f, "[sym@{} (invalid)]", self.0); - }; - str.fmt(f) - } -} - -impl Symbol for Sym { - const MAX: usize = u32::MAX as usize - 1; - fn try_from_usize(value: usize) -> Option { - Some(Self(NonZeroU32::try_from_usize(value)?)) - } - fn into_usize(self) -> usize { - self.0.into_usize() - } -} - -impl> From for Sym { - /// Converts to this type from the input type. - /// - /// # Blocks - /// This conversion blocks if the Global Interner lock is held. - /// - /// # May Panic - /// Panics if the Global Interner's lock has been poisoned by a panic in another thread - fn from(value: T) -> Self { - Self::new(value.as_ref()) - } -} - -impl TryFrom for String { - type Error = SymError; - - fn try_from(value: Sym) -> Result { - let Some(interner) = GLOBAL_INTERNER.get() else { - Err(SymError::Uninitialized)? - }; - let Ok(interner) = interner.write() else { - Err(SymError::Poisoned)? - }; - match interner.get_str(value) { - None => Err(SymError::Unseen(value)), - Some(string) => Ok(string.into()), - } - } -} - -/// Describes an error in [Sym] to [String] lookup -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum SymError { - Uninitialized, - Poisoned, - Unseen(Sym), -} -impl std::error::Error for SymError {} -impl Display for SymError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SymError::Uninitialized => "String pool was not initialized".fmt(f), - SymError::Poisoned => "String pool was held by panicking thread".fmt(f), - SymError::Unseen(sym) => { - write!(f, "Symbol {sym:?} not present in String pool") - } - } - } -} - -#[cfg(test)] -mod tests; diff --git a/compiler/cl-structures/src/arena/global_intern/tests.rs b/compiler/cl-structures/src/arena/global_intern/tests.rs deleted file mode 100644 index ea10ea6..0000000 --- a/compiler/cl-structures/src/arena/global_intern/tests.rs +++ /dev/null @@ -1,34 +0,0 @@ -//! Tests for the global intern pool -use super::*; - -#[test] -fn globalsym_from_returns_unique_value_for_unique_keys() { - let foo_bar = Sym::from("foo_bar"); - let foo_baz = Sym::from("foo_baz"); - assert_ne!(foo_bar, foo_baz); - assert_eq!(foo_bar, Sym::from("foo_bar")); - assert_eq!(foo_baz, Sym::from("foo_baz")); -} -#[test] -fn try_from_str_returns_none_before_init() { - if let Some(value) = Sym::try_from_str("") { - panic!("{value}") - } -} -#[test] -fn try_from_str_returns_some_when_key_exists() { - let _ = Sym::from("foo_bar"); - assert!(dbg!(Sym::try_from_str("foo_bar")).is_some()); -} - -#[test] -fn try_from_str_returns_the_same_thing_as_globalsym_from() { - let foo_bar = Sym::from("foo_bar"); - assert_eq!(Some(foo_bar), Sym::try_from_str("foo_bar")); -} - -#[test] -fn map_works() { - let foo_bar = Sym::from("foo_bar"); - assert!(foo_bar.map(|sym| "foo_bar" == sym).unwrap()); -} diff --git a/compiler/cl-structures/src/arena/intern.rs b/compiler/cl-structures/src/arena/intern.rs deleted file mode 100644 index 139d22d..0000000 --- a/compiler/cl-structures/src/arena/intern.rs +++ /dev/null @@ -1,49 +0,0 @@ -//! A string interner with deduplication - -use super::{string_arena::StringArena, symbol::Symbol}; -use hashbrown::hash_table::HashTable; -use std::hash::{BuildHasher, RandomState}; - -#[derive(Debug)] -pub struct Interner { - set: HashTable, - arena: StringArena, - hasher: H, -} - -impl Default for Interner { - fn default() -> Self { - Self { set: Default::default(), arena: Default::default(), hasher: Default::default() } - } -} - -impl Interner { - pub fn get_or_insert(&mut self, s: &str) -> Sym { - let Self { set: map, arena, hasher } = self; - let hash = hasher.hash_one(s); - *map.entry(hash, is_match(s, arena), |t| { - hasher.hash_one(arena.get(*t).unwrap()) - }) - .or_insert_with(|| arena.push_string(s)) - .get() - } - - pub fn get(&self, s: &str) -> Option { - let Self { set: map, arena, hasher } = self; - map.find(hasher.hash_one(s), is_match(s, arena)).copied() - } - - pub fn get_str(&self, sym: Sym) -> Option<&str> { - self.arena.get(sym) - } -} - -fn is_match<'a, Sym: Symbol>( - s: &'a str, - arena: &'a StringArena, -) -> impl Fn(&Sym) -> bool + 'a { - move |sym| match arena.get(*sym) { - Some(sym) => sym == s, - None => false, - } -} diff --git a/compiler/cl-structures/src/arena/string_arena.rs b/compiler/cl-structures/src/arena/string_arena.rs deleted file mode 100644 index 38ace17..0000000 --- a/compiler/cl-structures/src/arena/string_arena.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! Compactly stores a set of immutable strings, producing a [Symbol] for each one -use super::symbol::Symbol; -use std::marker::PhantomData; -/// Compactly stores a set of immutable strings, producing a [Symbol] for each one -#[derive(Debug)] -pub struct StringArena { - ends: Vec, - buf: String, - _t: PhantomData, -} - -impl StringArena { - pub fn new() -> Self { - Default::default() - } - /// # May panic - /// Panics if Symbol::from_usize would panic - fn next_key(&self) -> T { - Symbol::from_usize(self.ends.len()) - } - - fn get_span(&self, key: T) -> Option<(usize, usize)> { - let key = key.into_usize(); - Some((*self.ends.get(key - 1)?, *self.ends.get(key)?)) - } - - pub fn get(&self, key: T) -> Option<&str> { - let (start, end) = self.get_span(key)?; - // Safety: start and end offsets were created by push_string - Some(unsafe { self.buf.get_unchecked(start..end) }) - } - - pub fn push_string(&mut self, s: &str) -> T { - if self.ends.is_empty() { - self.ends.push(self.buf.len()) - } - let key = self.next_key(); - self.buf.push_str(s); - self.ends.push(self.buf.len()); - key - } -} - -impl Default for StringArena { - fn default() -> Self { - Self { ends: Default::default(), buf: Default::default(), _t: PhantomData } - } -} diff --git a/compiler/cl-structures/src/arena/symbol.rs b/compiler/cl-structures/src/arena/symbol.rs deleted file mode 100644 index 0e05df6..0000000 --- a/compiler/cl-structures/src/arena/symbol.rs +++ /dev/null @@ -1,35 +0,0 @@ -use std::{fmt, hash, num::*}; - -pub trait Symbol: Copy + fmt::Debug + fmt::Display + Eq + hash::Hash { - /// The largest [`usize`] that may be stored in the [Symbol] - const MAX: usize; - /// Returns [`Some(Self)`](Some) if `value` is in range 0..=[Symbol::MAX] - fn try_from_usize(value: usize) -> Option; - /// # May Panic - /// May panic if `value` is not in range 0..=[Symbol::MAX] - fn from_usize(value: usize) -> Self { - Self::try_from_usize(value).expect("should be within MIN and MAX") - } - fn into_usize(self) -> usize; -} - -#[rustfmt::skip] -impl Symbol for usize { - const MAX: usize = usize::MAX; - fn try_from_usize(value: usize) -> Option { Some(value) } - fn into_usize(self) -> usize { self } -} - -macro_rules! impl_symbol_for_nonzero{($($int:ident: $nonzero:ident),* $(,)?) => {$( - impl Symbol for $nonzero { - const MAX: usize = $int::MAX as usize - 1; - fn try_from_usize(value: usize) -> Option { - $nonzero::try_from(value.wrapping_add(1) as $int).ok() - } - fn into_usize(self) -> usize { - self.get() as usize - 1 - } - } -)*}} - -impl_symbol_for_nonzero!(u8: NonZeroU8, u16: NonZeroU16, u32: NonZeroU32, u64: NonZeroU64, usize: NonZeroUsize);