commit 24de507825db750c70658a74f0f4cd1dc2d20bbe Author: John Date: Thu Jan 16 20:18:15 2025 -0600 cl-arena: Break into its own repository, for reuse elsewhere diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0b15446 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ + +# Visual Studio Code config +.vscode + +# Rust +**/Cargo.lock +target + +# Pest files generated by Grammatical +*.p*st diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e5bf963 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "cl-arena" +repository = "https://git.soft.fish/j/Conlang" +version = "0.0.7" +authors = ["John Breaux "] +edition = "2021" +license = "MIT" +publish = ["soft-fish"] + +[dependencies] diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..1b2f600 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,9 @@ + The MIT License (MIT) + +Copyright © John Breaux (soft.fish) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..faf7ef6 --- /dev/null +++ b/readme.md @@ -0,0 +1,61 @@ +# A general-purpose arena allocator for `std` Rust projects + +`cl-arena` implements a generic bump allocator without piecemeal deallocation. This means that references to elements are valid for the lifetime of the allocator. + +## Why would you want this? + +It can be difficult to create self-referential data structures in Rust without jumping through hoops to satisfy the borrow checker. Giving all of your nodes the same lifetime would dramatically simplify your graph's construction, as long as you can wrap your back edges in a [`cell`][cell] of some kind. + +Additionally, having an allocator that spits out references with an identical lifetime can facilitate the creation of [object interners][intern], which, due to their uniqueness characteristic, allow cheap referential comparison between objects. + +## How do I use these allocators? + +### TypedArena + +The `TypedArena` stores elements of a single type, but will [Drop][Drop] its elements when the arena goes out of scope. + +```rust +// Create a new TypedArena +let string_arena = TypedArena::new(); + +// Allocate some `String`s onto the arena. +// Note that allocating onto the arena gives you a *mutable* reference (which you can ) +let elem_one: &mut String = string_arena.alloc(String::from("Hello, world!")); +let elem_two: &mut String = string_arena.alloc(String::from("Goodbye, world!")); + +println!("{elem_one}\n{elem_two}"); + +// Drop the TypedArena, dropping its contents +drop(string_arena); // drops elem_one and elem_two +``` + +### DroplessArena + +The `DroplessArena` stores data of any type that doesn't implement [Drop][Drop]. It has helper methods for allocating slices [`[T]`][slice] and [`str`][str]. + +```rust + // Create a new DroplessArena + let dropless_arena = DroplessArena::new(); + + // Allocate some data on the arena + // Note that alloc and alloc_slice return mutable references to the data... + let just1_i32: &mut i32 = dropless_arena.alloc(1); + let slice_i32: &mut [i32] = dropless_arena.alloc_slice(&[0, 2, 3, 4]); + + // ...and that alloc_str returns an *immutable* reference. + let slice_str: &str = dropless_arena.alloc_str("Hello, world!"); + + slice_i32[0] = *just1_i32; + + println!("just1_i32: {just1_i32:?}"); // just1_i32: 1 + println!("slice_i32: {slice_i32:?}"); // slice_i32: [1, 2, 3, 4] + println!("slice_str: {slice_str:?}"); // str_slice: "Hello, world!" +``` + + +[cell]: https://doc.rust-lang.org/core/cell/index.html +[Drop]: https://doc.rust-lang.org/core/ops/trait.Drop.html +[slice]: https://doc.rust-lang.org/core/primitive.slice.html +[str]: https://doc.rust-lang.org/core/primitive.str.html + +[intern]: https://en.wikipedia.org/wiki/String_interning diff --git a/src/chunk.rs b/src/chunk.rs new file mode 100644 index 0000000..60b905d --- /dev/null +++ b/src/chunk.rs @@ -0,0 +1,59 @@ +//! An [ArenaChunk] contains a block of raw memory for use in arena allocators. +use alloc::boxed::Box; +use core::{ + mem::{self, MaybeUninit}, + ptr::{self, NonNull}, +}; + +pub struct ArenaChunk { + pub(crate) mem: NonNull<[MaybeUninit]>, + pub(crate) filled: usize, +} + +impl ArenaChunk { + pub fn new(cap: usize) -> Self { + let slice = Box::new_uninit_slice(cap); + Self { + mem: NonNull::from(Box::leak(slice)), + filled: 0, + } + } + + /// Drops all elements inside self, and resets the filled count to 0 + /// + /// # Safety + /// + /// The caller must ensure that `self.filled` elements of self are currently initialized + pub unsafe fn drop_elements(&mut self) { + if mem::needs_drop::() { + // Safety: the caller has ensured that `filled` elements are initialized + unsafe { + let slice = self.mem.as_mut(); + for t in slice[..self.filled].iter_mut() { + t.assume_init_drop(); + } + } + self.filled = 0; + } + } + + /// Gets a pointer to the start of the arena + pub fn start(&mut self) -> *mut T { + self.mem.as_ptr() as _ + } + + /// Gets a pointer to the end of the arena + pub fn end(&mut self) -> *mut T { + if mem::size_of::() == 0 { + ptr::without_provenance_mut(usize::MAX) // pointers to ZSTs must be unique + } else { + unsafe { self.start().add(self.mem.len()) } + } + } +} + +impl Drop for ArenaChunk { + fn drop(&mut self) { + let _ = unsafe { Box::from_raw(self.mem.as_ptr()) }; + } +} diff --git a/src/dropless_arena.rs b/src/dropless_arena.rs new file mode 100644 index 0000000..d1852eb --- /dev/null +++ b/src/dropless_arena.rs @@ -0,0 +1,157 @@ +//! A [DroplessArena] can hold *any* combination of types as long as they don't implement +//! [Drop]. +use crate::{chunk::ArenaChunk, constants::*}; +use alloc::vec::Vec; +use core::{ + alloc::Layout, + cell::{Cell, RefCell}, + marker::PhantomData, + mem, ptr, slice, +}; + +pub struct DroplessArena<'arena> { + _lives: PhantomData<&'arena u8>, + chunks: RefCell>>, + head: Cell<*mut u8>, + tail: Cell<*mut u8>, +} + +impl Default for DroplessArena<'_> { + fn default() -> Self { + Self::new() + } +} + +impl<'arena> DroplessArena<'arena> { + pub const fn new() -> Self { + Self { + _lives: PhantomData, + chunks: RefCell::new(Vec::new()), + head: Cell::new(ptr::null_mut()), + tail: Cell::new(ptr::null_mut()), + } + } + + /// Allocates a `T` in the [DroplessArena], and returns a mutable reference to it. + /// + /// # Panics + /// - Panics if T implements [Drop] + /// - Panics if T is zero-sized + #[allow(clippy::mut_from_ref)] + pub fn alloc(&'arena self, value: T) -> &'arena mut T { + assert!(!mem::needs_drop::()); + assert!(mem::size_of::() != 0); + + let out = self.alloc_raw(Layout::new::()) as *mut T; + + unsafe { + ptr::write(out, value); + &mut *out + } + } + + /// Allocates a slice of `T`s`, copied from the given slice, returning a mutable reference + /// to it. + /// + /// # Panics + /// - Panics if T implements [Drop] + /// - Panics if T is zero-sized + /// - Panics if the slice is empty + #[allow(clippy::mut_from_ref)] + pub fn alloc_slice(&'arena self, slice: &[T]) -> &'arena mut [T] { + assert!(!mem::needs_drop::()); + assert!(mem::size_of::() != 0); + assert!(!slice.is_empty()); + + let mem = self.alloc_raw(Layout::for_value::<[T]>(slice)) as *mut T; + + unsafe { + mem.copy_from_nonoverlapping(slice.as_ptr(), slice.len()); + slice::from_raw_parts_mut(mem, slice.len()) + } + } + + /// Allocates a copy of the given [`&str`](str), returning a reference to the allocation. + /// + /// # Panics + /// Panics if the string is empty. + pub fn alloc_str(&'arena self, string: &str) -> &'arena str { + let slice = self.alloc_slice(string.as_bytes()); + + // Safety: This is a clone of the input string, which was valid + unsafe { core::str::from_utf8_unchecked(slice) } + } + + /// Allocates some [bytes](u8) based on the given [Layout]. + /// + /// # Panics + /// Panics if the provided [Layout] has size 0 + pub fn alloc_raw(&'arena self, layout: Layout) -> *mut u8 { + /// Rounds the given size (or pointer value) *up* to the given alignment + fn align_up(size: usize, align: usize) -> usize { + (size + align - 1) & !(align - 1) + } + /// Rounds the given size (or pointer value) *down* to the given alignment + fn align_down(size: usize, align: usize) -> usize { + size & !(align - 1) + } + + assert!(layout.size() != 0); + loop { + let Self { head, tail, .. } = self; + let start = head.get().addr(); + let end = tail.get().addr(); + + let align = 8.max(layout.align()); + + let bytes = align_up(layout.size(), align); + + if let Some(end) = end.checked_sub(bytes) { + let end = align_down(end, layout.align()); + + if start <= end { + tail.set(tail.get().with_addr(end)); + return tail.get(); + } + } + + self.grow(layout.size()); + } + } + + /// Grows the allocator, doubling the chunk size until it reaches [MAX_CHUNK]. + #[cold] + #[inline(never)] + fn grow(&self, len: usize) { + let mut chunks = self.chunks.borrow_mut(); + + let capacity = if let Some(last) = chunks.last_mut() { + last.mem.len().min(MAX_CHUNK / 2) * 2 + } else { + MIN_CHUNK + } + .max(len); + + let mut chunk = ArenaChunk::::new(capacity); + + self.head.set(chunk.start()); + self.tail.set(chunk.end()); + chunks.push(chunk); + } + + /// Checks whether the given slice is allocated in this arena + pub fn contains_slice(&self, slice: &[T]) -> bool { + let ptr = slice.as_ptr().cast::().cast_mut(); + for chunk in self.chunks.borrow_mut().iter_mut() { + if chunk.start() <= ptr && ptr <= chunk.end() { + return true; + } + } + false + } +} + +unsafe impl Send for DroplessArena<'_> {} + +#[cfg(test)] +mod tests; diff --git a/src/dropless_arena/tests.rs b/src/dropless_arena/tests.rs new file mode 100644 index 0000000..3e99ae7 --- /dev/null +++ b/src/dropless_arena/tests.rs @@ -0,0 +1,62 @@ +use super::DroplessArena; +extern crate std; +use core::alloc::Layout; +use std::{prelude::rust_2021::*, vec}; + +#[test] +fn readme_example() { + // Create a new DroplessArena + let dropless_arena = DroplessArena::new(); + + // Allocate some data on the arena + // Note that alloc and alloc_slice return mutable references to the data... + let just1_i32: &mut i32 = dropless_arena.alloc(1); + let slice_i32: &mut [i32] = dropless_arena.alloc_slice(&[0, 2, 3, 4]); + + // ...and that alloc_str returns an *immutable* reference. + let slice_str: &str = dropless_arena.alloc_str("Hello, world!"); + + slice_i32[0] = *just1_i32; + + std::println!("just1_i32: {just1_i32:?}"); // just1_i32: 1 + std::println!("slice_i32: {slice_i32:?}"); // slice_i32: [1, 2, 3, 4] + std::println!("slice_str: {slice_str:?}"); // str_slice: "Hello, world!" +} + +#[test] +fn alloc_raw() { + let arena = DroplessArena::new(); + let bytes = arena.alloc_raw(Layout::for_value(&0u128)); + let byte2 = arena.alloc_raw(Layout::for_value(&0u128)); + + assert_ne!(bytes, byte2); +} + +#[test] +fn alloc() { + let arena = DroplessArena::new(); + let mut allocations = vec![]; + for i in 0..0x400 { + allocations.push(arena.alloc(i)); + } +} + +#[test] +fn alloc_strings() { + const KW: &[&str] = &["pub", "mut", "fn", "mod", "conlang", "sidon", "🦈"]; + let arena = DroplessArena::new(); + let mut allocations = vec![]; + for _ in 0..100 { + for kw in KW { + allocations.push(arena.alloc_str(kw)); + } + } +} + +#[test] +#[should_panic] +fn alloc_zsts() { + struct Zst; + let arena = DroplessArena::new(); + arena.alloc(Zst); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7a6d51c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,23 @@ +//! Typed and dropless arena allocation, paraphrased from [the Rust Compiler's `rustc_arena`](https://github.com/rust-lang/rust/blob/master/compiler/rustc_arena/src/lib.rs). See [LICENSE][1]. +//! +//! An Arena Allocator is a type of allocator which provides stable locations for allocations within +//! itself for the entire duration of its lifetime. +//! +//! [1]: https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT + +#![feature(dropck_eyepatch)] +#![no_std] + +extern crate alloc; + +pub(crate) mod constants { + //! Size constants for arena chunk growth + pub(crate) const MIN_CHUNK: usize = 512; + pub(crate) const MAX_CHUNK: usize = 2 * 1024 * 1024; +} + +mod chunk; + +pub mod typed_arena; + +pub mod dropless_arena; diff --git a/src/typed_arena.rs b/src/typed_arena.rs new file mode 100644 index 0000000..46534e4 --- /dev/null +++ b/src/typed_arena.rs @@ -0,0 +1,159 @@ +//! A [TypedArena] can hold many instances of a single type, and will properly [Drop] them. +#![allow(clippy::mut_from_ref)] + +use crate::{chunk::ArenaChunk, constants::*}; +use alloc::vec::Vec; +use core::{ + cell::{Cell, RefCell}, + marker::PhantomData, + mem, ptr, slice, +}; + +/// A [TypedArena] can hold many instances of a single type, and will properly [Drop] them when +/// it falls out of scope. +pub struct TypedArena<'arena, T> { + _lives: PhantomData<&'arena T>, + _drops: PhantomData, + chunks: RefCell>>, + head: Cell<*mut T>, + tail: Cell<*mut T>, +} + +impl Default for TypedArena<'_, T> { + fn default() -> Self { + Self::new() + } +} + +impl<'arena, T> TypedArena<'arena, T> { + pub const fn new() -> Self { + Self { + _lives: PhantomData, + _drops: PhantomData, + chunks: RefCell::new(Vec::new()), + head: Cell::new(ptr::null_mut()), + tail: Cell::new(ptr::null_mut()), + } + } + + pub fn alloc(&'arena self, value: T) -> &'arena mut T { + if self.head == self.tail { + self.grow(1); + } + + let out = if mem::size_of::() == 0 { + self.head + .set(ptr::without_provenance_mut(self.head.get().addr() + 1)); + ptr::NonNull::::dangling().as_ptr() + } else { + let out = self.head.get(); + self.head.set(unsafe { out.add(1) }); + out + }; + + unsafe { + ptr::write(out, value); + &mut *out + } + } + + fn can_allocate(&self, len: usize) -> bool { + len <= unsafe { self.tail.get().offset_from(self.head.get()) as usize } + } + + /// # Panics + /// Panics if size_of:: == 0 || len == 0 + #[inline] + fn alloc_raw_slice(&self, len: usize) -> *mut T { + assert!(mem::size_of::() != 0); + assert!(len != 0); + + if !self.can_allocate(len) { + self.grow(len) + } + + let out = self.head.get(); + + unsafe { self.head.set(out.add(len)) }; + out + } + + pub fn alloc_from_iter(&'arena self, iter: I) -> &'arena mut [T] + where + I: IntoIterator, + { + // Collect them all into a buffer so they're allocated contiguously + let mut buf = iter.into_iter().collect::>(); + if buf.is_empty() { + return &mut []; + } + + let len = buf.len(); + // If T is a ZST, calling alloc_raw_slice will panic + let slice = if mem::size_of::() == 0 { + self.head + .set(ptr::without_provenance_mut(self.head.get().addr() + len)); + ptr::NonNull::dangling().as_ptr() + } else { + self.alloc_raw_slice(len) + }; + + unsafe { + buf.as_ptr().copy_to_nonoverlapping(slice, len); + buf.set_len(0); + slice::from_raw_parts_mut(slice, len) + } + } + + #[cold] + #[inline(never)] + fn grow(&self, len: usize) { + let size = mem::size_of::().max(1); + + let mut chunks = self.chunks.borrow_mut(); + + let capacity = if let Some(last) = chunks.last_mut() { + last.filled = self.get_filled_of_chunk(last); + last.mem.len().min(MAX_CHUNK / size) * 2 + } else { + MIN_CHUNK / size + } + .max(len); + + let mut chunk = ArenaChunk::::new(capacity); + + self.head.set(chunk.start()); + self.tail.set(chunk.end()); + chunks.push(chunk); + } + + fn get_filled_of_chunk(&self, chunk: &mut ArenaChunk) -> usize { + let Self { head: tail, .. } = self; + let head = chunk.start(); + if mem::size_of::() == 0 { + tail.get().addr() - head.addr() + } else { + unsafe { tail.get().offset_from(head) as usize } + } + } +} + +unsafe impl Send for TypedArena<'_, T> {} + +unsafe impl<#[may_dangle] T> Drop for TypedArena<'_, T> { + fn drop(&mut self) { + let mut chunks = self.chunks.borrow_mut(); + + if let Some(last) = chunks.last_mut() { + last.filled = self.get_filled_of_chunk(last); + self.tail.set(self.head.get()); + } + + for chunk in chunks.iter_mut() { + unsafe { chunk.drop_elements() } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/src/typed_arena/tests.rs b/src/typed_arena/tests.rs new file mode 100644 index 0000000..8dae706 --- /dev/null +++ b/src/typed_arena/tests.rs @@ -0,0 +1,78 @@ +use super::TypedArena; +extern crate std; +use std::{prelude::rust_2021::*, print, vec}; + +#[test] +fn readme_example() { + // Create a new TypedArena + let string_arena = TypedArena::new(); + + // Allocate some `String`s onto the arena. + // Note that allocating onto the arena gives you a *mutable* reference (which you can ) + let elem_one: &mut String = string_arena.alloc(String::from("Hello, world!")); + let elem_two: &mut String = string_arena.alloc(String::from("Goodbye, world!")); + + std::println!("{elem_one}\n{elem_two}"); + + // Drop the TypedArena, dropping its contents + drop(string_arena); // drops elem_one and elem_two +} + +#[test] +fn pushing_to_arena() { + let arena = TypedArena::new(); + let foo = arena.alloc("foo"); + let bar = arena.alloc("bar"); + let baz = arena.alloc("baz"); + + assert_eq!("foo", *foo); + assert_eq!("bar", *bar); + assert_eq!("baz", *baz); +} + +#[test] +fn pushing_vecs_to_arena() { + let arena = TypedArena::new(); + + let foo = arena.alloc(vec!["foo"]); + let bar = arena.alloc(vec!["bar"]); + let baz = arena.alloc(vec!["baz"]); + + assert_eq!("foo", foo[0]); + assert_eq!("bar", bar[0]); + assert_eq!("baz", baz[0]); +} + +#[test] +fn pushing_zsts() { + struct ZeroSized; + impl Drop for ZeroSized { + fn drop(&mut self) { + print!("") + } + } + + let arena = TypedArena::new(); + + for _ in 0..0x100 { + arena.alloc(ZeroSized); + } +} + +#[test] +fn pushing_nodrop_zsts() { + struct ZeroSized; + let arena = TypedArena::new(); + + for _ in 0..0x1000 { + arena.alloc(ZeroSized); + } +} +#[test] +fn resize() { + let arena = TypedArena::new(); + + for _ in 0..0x780 { + arena.alloc(0u128); + } +}