cl-structures: Use hashbrown's hash table implementation for deduplication.

This commit is contained in:
John 2024-04-24 17:43:02 -05:00
parent e70ffd1895
commit be604b7b45
3 changed files with 26 additions and 37 deletions

View File

@ -8,3 +8,4 @@ license.workspace = true
publish.workspace = true publish.workspace = true
[dependencies] [dependencies]
hashbrown = { version = "0.14.3", default-features = false }

View File

@ -1,55 +1,49 @@
//! A string interner with deduplication //! A string interner with deduplication
use super::{symbol::Symbol, StringArena}; use super::{symbol::Symbol, StringArena};
use std::{ use hashbrown::hash_table::HashTable;
collections::{hash_map::RawEntryMut, HashMap}, use std::hash::{BuildHasher, RandomState};
hash::{BuildHasher, RandomState},
};
#[derive(Debug)] #[derive(Debug)]
pub struct Interner<T: Symbol, H: BuildHasher = RandomState> { pub struct Interner<Sym: Symbol, H: BuildHasher = RandomState> {
map: HashMap<T, ()>, set: HashTable<Sym>,
arena: StringArena<T>, arena: StringArena<Sym>,
hasher: H, hasher: H,
} }
impl<T: Symbol, H: BuildHasher + Default> Default for Interner<T, H> { impl<Sym: Symbol, H: BuildHasher + Default> Default for Interner<Sym, H> {
fn default() -> Self { fn default() -> Self {
Self { map: Default::default(), arena: Default::default(), hasher: Default::default() } Self { set: Default::default(), arena: Default::default(), hasher: Default::default() }
} }
} }
impl<T: Symbol, H: BuildHasher> Interner<T, H> { impl<Sym: Symbol, H: BuildHasher> Interner<Sym, H> {
pub fn get_or_insert(&mut self, s: &str) -> T { pub fn get_or_insert(&mut self, s: &str) -> Sym {
let Self { map, arena, hasher } = self; let Self { set: map, arena, hasher } = self;
let hash = hasher.hash_one(s); let hash = hasher.hash_one(s);
match map.raw_entry_mut().from_hash(hash, is_match(s, arena)) { *map.entry(hash, is_match(s, arena), |t| {
RawEntryMut::Occupied(entry) => *entry.into_key(), hasher.hash_one(arena.get(*t).unwrap())
RawEntryMut::Vacant(entry) => { })
let tok = arena.push_string(s); .or_insert_with(|| arena.push_string(s))
*(entry.insert_hashed_nocheck(hash, tok, ()).0) .get()
}
}
} }
pub fn get(&self, s: &str) -> Option<T> { pub fn get(&self, s: &str) -> Option<Sym> {
let Self { map, arena, hasher } = self; let Self { set: map, arena, hasher } = self;
map.raw_entry() map.find(hasher.hash_one(s), is_match(s, arena)).copied()
.from_hash(hasher.hash_one(s), is_match(s, arena))
.map(|entry| *entry.0)
} }
pub fn get_str(&self, sym: T) -> Option<&str> { pub fn get_str(&self, sym: Sym) -> Option<&str> {
self.arena.get(sym) self.arena.get(sym)
} }
} }
fn is_match<'a, T: Symbol>( fn is_match<'a, Sym: Symbol>(
target: &'a str, s: &'a str,
arena: &'a StringArena<T>, arena: &'a StringArena<Sym>,
) -> impl Fn(&T) -> bool + 'a { ) -> impl Fn(&Sym) -> bool + 'a {
move |sym| match arena.get(*sym) { move |sym| match arena.get(*sym) {
Some(sym) => sym == target, Some(sym) => sym == s,
None => false, None => false,
} }
} }

View File

@ -2,13 +2,7 @@
//! - [Span](struct@span::Span): Stores a start and end [Loc](struct@span::Loc) //! - [Span](struct@span::Span): Stores a start and end [Loc](struct@span::Loc)
//! - [Loc](struct@span::Loc): Stores the index in a stream //! - [Loc](struct@span::Loc): Stores the index in a stream
#![warn(clippy::all)] #![warn(clippy::all)]
#![feature( #![feature(inline_const, dropck_eyepatch, decl_macro, get_many_mut)]
inline_const,
dropck_eyepatch,
decl_macro,
get_many_mut,
hash_raw_entry
)]
#![deny(unsafe_op_in_unsafe_fn)] #![deny(unsafe_op_in_unsafe_fn)]
pub mod arena; pub mod arena;