cl-structures: Fully remove unused arenas v2 sources
This commit is contained in:
parent
12046fa9f7
commit
e7c5a02afa
@ -1,130 +0,0 @@
|
|||||||
//! A global intern pool for strings, represented by the [Sym] symbol
|
|
||||||
|
|
||||||
use super::{intern::Interner, symbol::Symbol};
|
|
||||||
use std::{
|
|
||||||
fmt::Display,
|
|
||||||
num::NonZeroU32,
|
|
||||||
sync::{OnceLock, RwLock},
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Holds a globally accessible [Interner] which uses [Sym] as its [Symbol]
|
|
||||||
static GLOBAL_INTERNER: OnceLock<RwLock<Interner<Sym>>> = OnceLock::new();
|
|
||||||
|
|
||||||
/// A unique identifier corresponding to a particular globally-interned [String].
|
|
||||||
///
|
|
||||||
/// Copies of that string can be obtained with [Sym::get] or [String::try_from].
|
|
||||||
///
|
|
||||||
/// New strings can be interned with [Sym::new] or [Sym::from]
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
|
||||||
pub struct Sym(NonZeroU32);
|
|
||||||
|
|
||||||
impl Sym {
|
|
||||||
/// Gets the interned [Sym] for the given value, or interns a new one.
|
|
||||||
///
|
|
||||||
/// # Blocks
|
|
||||||
/// This conversion blocks if the Global Interner lock is held.
|
|
||||||
///
|
|
||||||
/// # May Panic
|
|
||||||
/// Panics if the Global Interner's lock has been poisoned by a panic in another thread
|
|
||||||
pub fn new(value: &str) -> Self {
|
|
||||||
GLOBAL_INTERNER
|
|
||||||
.get_or_init(Default::default)
|
|
||||||
.write()
|
|
||||||
.expect("global interner should not be poisoned in another thread")
|
|
||||||
.get_or_insert(value)
|
|
||||||
}
|
|
||||||
/// Gets a [Sym] associated with the given string, if one already exists
|
|
||||||
pub fn try_from_str(value: &str) -> Option<Self> {
|
|
||||||
GLOBAL_INTERNER.get()?.read().ok()?.get(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets a copy of the value of the [Sym]
|
|
||||||
// TODO: Make this copy-less
|
|
||||||
pub fn get(self) -> Option<String> {
|
|
||||||
String::try_from(self).ok()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Looks up the string associated with this [Sym],
|
|
||||||
/// and performs a transformation on it if it exists.
|
|
||||||
pub fn map<T>(&self, f: impl Fn(&str) -> T) -> Option<T> {
|
|
||||||
Some(f(GLOBAL_INTERNER.get()?.read().ok()?.get_str(*self)?))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for Sym {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
let Some(interner) = GLOBAL_INTERNER.get() else {
|
|
||||||
return write!(f, "[sym@{} (uninitialized)]", self.0);
|
|
||||||
};
|
|
||||||
let Ok(interner) = interner.read() else {
|
|
||||||
return write!(f, "[sym@{} (poisoned)]", self.0);
|
|
||||||
};
|
|
||||||
let Some(str) = interner.get_str(*self) else {
|
|
||||||
return write!(f, "[sym@{} (invalid)]", self.0);
|
|
||||||
};
|
|
||||||
str.fmt(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Symbol for Sym {
|
|
||||||
const MAX: usize = u32::MAX as usize - 1;
|
|
||||||
fn try_from_usize(value: usize) -> Option<Self> {
|
|
||||||
Some(Self(NonZeroU32::try_from_usize(value)?))
|
|
||||||
}
|
|
||||||
fn into_usize(self) -> usize {
|
|
||||||
self.0.into_usize()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: AsRef<str>> From<T> for Sym {
|
|
||||||
/// Converts to this type from the input type.
|
|
||||||
///
|
|
||||||
/// # Blocks
|
|
||||||
/// This conversion blocks if the Global Interner lock is held.
|
|
||||||
///
|
|
||||||
/// # May Panic
|
|
||||||
/// Panics if the Global Interner's lock has been poisoned by a panic in another thread
|
|
||||||
fn from(value: T) -> Self {
|
|
||||||
Self::new(value.as_ref())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<Sym> for String {
|
|
||||||
type Error = SymError;
|
|
||||||
|
|
||||||
fn try_from(value: Sym) -> Result<Self, Self::Error> {
|
|
||||||
let Some(interner) = GLOBAL_INTERNER.get() else {
|
|
||||||
Err(SymError::Uninitialized)?
|
|
||||||
};
|
|
||||||
let Ok(interner) = interner.write() else {
|
|
||||||
Err(SymError::Poisoned)?
|
|
||||||
};
|
|
||||||
match interner.get_str(value) {
|
|
||||||
None => Err(SymError::Unseen(value)),
|
|
||||||
Some(string) => Ok(string.into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Describes an error in [Sym] to [String] lookup
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
||||||
pub enum SymError {
|
|
||||||
Uninitialized,
|
|
||||||
Poisoned,
|
|
||||||
Unseen(Sym),
|
|
||||||
}
|
|
||||||
impl std::error::Error for SymError {}
|
|
||||||
impl Display for SymError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
SymError::Uninitialized => "String pool was not initialized".fmt(f),
|
|
||||||
SymError::Poisoned => "String pool was held by panicking thread".fmt(f),
|
|
||||||
SymError::Unseen(sym) => {
|
|
||||||
write!(f, "Symbol {sym:?} not present in String pool")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests;
|
|
@ -1,34 +0,0 @@
|
|||||||
//! Tests for the global intern pool
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn globalsym_from_returns_unique_value_for_unique_keys() {
|
|
||||||
let foo_bar = Sym::from("foo_bar");
|
|
||||||
let foo_baz = Sym::from("foo_baz");
|
|
||||||
assert_ne!(foo_bar, foo_baz);
|
|
||||||
assert_eq!(foo_bar, Sym::from("foo_bar"));
|
|
||||||
assert_eq!(foo_baz, Sym::from("foo_baz"));
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn try_from_str_returns_none_before_init() {
|
|
||||||
if let Some(value) = Sym::try_from_str("") {
|
|
||||||
panic!("{value}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn try_from_str_returns_some_when_key_exists() {
|
|
||||||
let _ = Sym::from("foo_bar");
|
|
||||||
assert!(dbg!(Sym::try_from_str("foo_bar")).is_some());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn try_from_str_returns_the_same_thing_as_globalsym_from() {
|
|
||||||
let foo_bar = Sym::from("foo_bar");
|
|
||||||
assert_eq!(Some(foo_bar), Sym::try_from_str("foo_bar"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn map_works() {
|
|
||||||
let foo_bar = Sym::from("foo_bar");
|
|
||||||
assert!(foo_bar.map(|sym| "foo_bar" == sym).unwrap());
|
|
||||||
}
|
|
@ -1,49 +0,0 @@
|
|||||||
//! A string interner with deduplication
|
|
||||||
|
|
||||||
use super::{string_arena::StringArena, symbol::Symbol};
|
|
||||||
use hashbrown::hash_table::HashTable;
|
|
||||||
use std::hash::{BuildHasher, RandomState};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Interner<Sym: Symbol, H: BuildHasher = RandomState> {
|
|
||||||
set: HashTable<Sym>,
|
|
||||||
arena: StringArena<Sym>,
|
|
||||||
hasher: H,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Sym: Symbol, H: BuildHasher + Default> Default for Interner<Sym, H> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self { set: Default::default(), arena: Default::default(), hasher: Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Sym: Symbol, H: BuildHasher> Interner<Sym, H> {
|
|
||||||
pub fn get_or_insert(&mut self, s: &str) -> Sym {
|
|
||||||
let Self { set: map, arena, hasher } = self;
|
|
||||||
let hash = hasher.hash_one(s);
|
|
||||||
*map.entry(hash, is_match(s, arena), |t| {
|
|
||||||
hasher.hash_one(arena.get(*t).unwrap())
|
|
||||||
})
|
|
||||||
.or_insert_with(|| arena.push_string(s))
|
|
||||||
.get()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get(&self, s: &str) -> Option<Sym> {
|
|
||||||
let Self { set: map, arena, hasher } = self;
|
|
||||||
map.find(hasher.hash_one(s), is_match(s, arena)).copied()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_str(&self, sym: Sym) -> Option<&str> {
|
|
||||||
self.arena.get(sym)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_match<'a, Sym: Symbol>(
|
|
||||||
s: &'a str,
|
|
||||||
arena: &'a StringArena<Sym>,
|
|
||||||
) -> impl Fn(&Sym) -> bool + 'a {
|
|
||||||
move |sym| match arena.get(*sym) {
|
|
||||||
Some(sym) => sym == s,
|
|
||||||
None => false,
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,48 +0,0 @@
|
|||||||
//! Compactly stores a set of immutable strings, producing a [Symbol] for each one
|
|
||||||
use super::symbol::Symbol;
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
/// Compactly stores a set of immutable strings, producing a [Symbol] for each one
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct StringArena<T: Symbol> {
|
|
||||||
ends: Vec<usize>,
|
|
||||||
buf: String,
|
|
||||||
_t: PhantomData<fn(T)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Symbol> StringArena<T> {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Default::default()
|
|
||||||
}
|
|
||||||
/// # May panic
|
|
||||||
/// Panics if Symbol::from_usize would panic
|
|
||||||
fn next_key(&self) -> T {
|
|
||||||
Symbol::from_usize(self.ends.len())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_span(&self, key: T) -> Option<(usize, usize)> {
|
|
||||||
let key = key.into_usize();
|
|
||||||
Some((*self.ends.get(key - 1)?, *self.ends.get(key)?))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get(&self, key: T) -> Option<&str> {
|
|
||||||
let (start, end) = self.get_span(key)?;
|
|
||||||
// Safety: start and end offsets were created by push_string
|
|
||||||
Some(unsafe { self.buf.get_unchecked(start..end) })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn push_string(&mut self, s: &str) -> T {
|
|
||||||
if self.ends.is_empty() {
|
|
||||||
self.ends.push(self.buf.len())
|
|
||||||
}
|
|
||||||
let key = self.next_key();
|
|
||||||
self.buf.push_str(s);
|
|
||||||
self.ends.push(self.buf.len());
|
|
||||||
key
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Symbol> Default for StringArena<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self { ends: Default::default(), buf: Default::default(), _t: PhantomData }
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,35 +0,0 @@
|
|||||||
use std::{fmt, hash, num::*};
|
|
||||||
|
|
||||||
pub trait Symbol: Copy + fmt::Debug + fmt::Display + Eq + hash::Hash {
|
|
||||||
/// The largest [`usize`] that may be stored in the [Symbol]
|
|
||||||
const MAX: usize;
|
|
||||||
/// Returns [`Some(Self)`](Some) if `value` is in range 0..=[Symbol::MAX]
|
|
||||||
fn try_from_usize(value: usize) -> Option<Self>;
|
|
||||||
/// # May Panic
|
|
||||||
/// May panic if `value` is not in range 0..=[Symbol::MAX]
|
|
||||||
fn from_usize(value: usize) -> Self {
|
|
||||||
Self::try_from_usize(value).expect("should be within MIN and MAX")
|
|
||||||
}
|
|
||||||
fn into_usize(self) -> usize;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[rustfmt::skip]
|
|
||||||
impl Symbol for usize {
|
|
||||||
const MAX: usize = usize::MAX;
|
|
||||||
fn try_from_usize(value: usize) -> Option<Self> { Some(value) }
|
|
||||||
fn into_usize(self) -> usize { self }
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! impl_symbol_for_nonzero{($($int:ident: $nonzero:ident),* $(,)?) => {$(
|
|
||||||
impl Symbol for $nonzero {
|
|
||||||
const MAX: usize = $int::MAX as usize - 1;
|
|
||||||
fn try_from_usize(value: usize) -> Option<Self> {
|
|
||||||
$nonzero::try_from(value.wrapping_add(1) as $int).ok()
|
|
||||||
}
|
|
||||||
fn into_usize(self) -> usize {
|
|
||||||
self.get() as usize - 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)*}}
|
|
||||||
|
|
||||||
impl_symbol_for_nonzero!(u8: NonZeroU8, u16: NonZeroU16, u32: NonZeroU32, u64: NonZeroU64, usize: NonZeroUsize);
|
|
Loading…
Reference in New Issue
Block a user