cl-typeck: Move type inference utils into own module

This commit is contained in:
John 2024-05-16 15:06:59 -05:00
parent 33b7cd3971
commit be81221895
2 changed files with 267 additions and 239 deletions

View File

@ -0,0 +1,248 @@
//! Implements type unification, used by the Hindley-Milner type inference algorithm
//!
//! Inspired by [rust-hindley-milner][1] and [hindley-milner-python][2]
//!
//! [1]: https://github.com/tcr/rust-hindley-milner/
//! [2]: https://github.com/rob-smallshire/hindley-milner-python
use cl_ast::Sym;
use core::fmt;
use std::{cell::RefCell, rc::Rc};
/*
Types in Conlang:
- Never type: !
- type !
- for<A> ! -> A
- Primitive types: bool, i32, (), ...
- type bool; ...
- Reference types: &T, *T
- for<T> type ref<T>; for<T> type ptr<T>
- Slice type: [T]
- for<T> type slice<T>
- Array type: [T;usize]
- for<T> type array<T, instanceof<usize>>
- Tuple type: (T, ...Z)
- for<T, ..> type tuple<T, ..> // on a per-case basis!
- Funct type: fn Tuple -> R
- for<T, R> type T -> R // on a per-case basis!
*/
/// A refcounted [Type]
pub type RcType = Rc<Type>;
#[derive(Debug, PartialEq, Eq)]
pub struct Variable {
pub instance: RefCell<Option<RcType>>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Operator {
name: Sym,
types: RefCell<Vec<RcType>>,
}
/// A [Type::Variable] or [Type::Operator]:
/// - A [Type::Variable] can be either bound or unbound (instance: Some(_) | None)
/// - A [Type::Operator] has a name (used to identify the operator) and a list of types.
///
/// A type which contains unbound variables is considered "generic" (see
/// [`Type::is_generic()`]).
#[derive(Debug, PartialEq, Eq)]
pub enum Type {
Variable(Variable),
Operator(Operator),
}
impl Type {
/// Creates a new unbound [type variable](Type::Variable)
pub fn new_var() -> RcType {
Rc::new(Self::Variable(Variable { instance: RefCell::new(None) }))
}
/// Creates a variable that is a new instance of another [Type]
pub fn new_inst(of: &RcType) -> RcType {
Rc::new(Self::Variable(Variable {
instance: RefCell::new(Some(of.clone())),
}))
}
/// Creates a new [type operator](Type::Operator)
pub fn new_op(name: Sym, types: &[RcType]) -> RcType {
Rc::new(Self::Operator(Operator {
name,
types: RefCell::new(types.to_vec()),
}))
}
/// Creates a new [type operator](Type::Operator) representing a lambda
pub fn new_fn(takes: &RcType, returns: &RcType) -> RcType {
Self::new_op("fn".into(), &[takes.clone(), returns.clone()])
}
/// Creates a new [type operator](Type::Operator) representing a primitive type
pub fn new_prim(name: Sym) -> RcType {
Self::new_op(name, &[])
}
/// Creates a new [type operator](Type::Operator) representing a tuple
pub fn new_tuple(members: &[RcType]) -> RcType {
Self::new_op("tuple".into(), members)
}
/// Sets this type variable to be an instance `of` the other
/// # Panics
/// Panics if `self` is not a type variable
pub fn set_instance(self: &RcType, of: &RcType) {
match self.as_ref() {
Type::Operator(_) => unimplemented!("Cannot set instance of a type operator"),
Type::Variable(Variable { instance }) => *instance.borrow_mut() = Some(of.clone()),
}
}
/// Checks whether there are any unbound type variables in this type.
/// ```rust
/// # use cl_typeck::inference::*;
/// let bool = Type::new_op("bool".into(), &[]);
/// let true_v = Type::new_inst(&bool);
/// let unbound = Type::new_var();
/// let id_fun = Type::new_fn(&unbound, &unbound);
/// let truthy = Type::new_fn(&unbound, &bool);
/// assert!(!bool.is_generic()); // bool contains no unbound type variables
/// assert!(!true_v.is_generic()); // true_v is bound to `bool`
/// assert!(unbound.is_generic()); // unbound is an unbound type variable
/// assert!(id_fun.is_generic()); // id_fun is a function with unbound type variables
/// assert!(truthy.is_generic()); // truthy is a function with one unbound type variable
/// ```
pub fn is_generic(self: &RcType) -> bool {
match self.as_ref() {
Type::Variable(Variable { instance }) => match instance.borrow().as_ref() {
// base case: self is an unbound type variable (instance is none)
None => true,
// Variable is bound to a type which may be generic
Some(instance) => instance.is_generic(),
},
Type::Operator(Operator { types, .. }) => {
// Operator may have generic args
types.borrow().iter().any(Self::is_generic)
}
}
}
/// Makes a deep copy of a type expression.
///
/// Bound variables are shared, unbound variables are duplicated.
pub fn deep_clone(self: &RcType) -> RcType {
// If there aren't any unbound variables, it's fine to clone the entire expression
if !self.is_generic() {
return self.clone();
}
// There are unbound type variables, so we make a new one
match self.as_ref() {
Type::Variable { .. } => Self::new_var(),
Type::Operator(Operator { name, types }) => Self::new_op(
*name,
&types
.borrow()
.iter()
.map(Self::deep_clone)
.collect::<Vec<_>>(),
),
}
}
/// Returns the defining instance of `self`,
/// collapsing type instances along the way.
/// # May panic
/// Panics if this type variable's instance field is already borrowed.
/// # Examples
/// ```rust
/// # use cl_typeck::inference::*;
/// let t_bool = Type::new_op("bool".into(), &[]);
/// let t_nest = Type::new_inst(&Type::new_inst(&Type::new_inst(&t_bool)));
/// let pruned = t_nest.prune();
/// assert_eq!(pruned, t_bool);
/// assert_eq!(t_nest, Type::new_inst(&t_bool));
/// ```
pub fn prune(self: &RcType) -> RcType {
if let Type::Variable(Variable { instance }) = self.as_ref() {
if let Some(old_inst) = instance.borrow_mut().as_mut() {
let new_inst = old_inst.prune(); // get defining instance
*old_inst = new_inst.clone(); // collapse
return new_inst;
}
}
self.clone()
}
/// Checks whether a type expression occurs in another type expression
///
/// # Note:
/// - Since the test uses strict equality, `self` should be pruned prior to testing.
/// - The test is *not guaranteed to terminate* for recursive types.
pub fn occurs_in(self: &RcType, other: &RcType) -> bool {
if self == other {
return true;
}
match other.as_ref() {
Type::Variable(Variable { instance }) => match instance.borrow().as_ref() {
Some(t) => self.occurs_in(t),
None => false,
},
Type::Operator(Operator { types, .. }) => {
// Note: this might panic.
// Think about whether it panics for only recursive types?
types.borrow().iter().any(|other| self.occurs_in(other))
}
}
}
/// Unifies two type expressions, propagating changes via interior mutability
pub fn unify(self: &RcType, other: &RcType) -> Result<(), InferenceError> {
let (a, b) = (self.prune(), other.prune()); // trim the hedges
match (a.as_ref(), b.as_ref()) {
(Type::Variable { .. }, _) if !a.occurs_in(&b) => a.set_instance(&b),
(Type::Variable { .. }, _) => Err(InferenceError::Recursive(a, b))?,
(Type::Operator { .. }, Type::Variable { .. }) => b.unify(&a)?,
(
Type::Operator(Operator { name: a_name, types: a_types }),
Type::Operator(Operator { name: b_name, types: b_types }),
) => {
let (a_types, b_types) = (a_types.borrow(), b_types.borrow());
if a_name != b_name || a_types.len() != b_types.len() {
Err(InferenceError::Mismatch(a.clone(), b.clone()))?
}
for (a, b) in a_types.iter().zip(b_types.iter()) {
a.unify(b)?
}
}
}
Ok(())
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Type::Variable(Variable { instance }) => match instance.borrow().as_ref() {
Some(instance) => write!(f, "{instance}"),
None => write!(f, "_"),
},
Type::Operator(Operator { name, types }) => {
write!(f, "({name}")?;
for ty in types.borrow().iter() {
write!(f, " {ty}")?;
}
f.write_str(")")
}
}
}
}
/// An error produced during type inference
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum InferenceError {
Mismatch(RcType, RcType),
Recursive(RcType, RcType),
}
impl fmt::Display for InferenceError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InferenceError::Mismatch(a, b) => write!(f, "Type mismatch: {a:?} != {b:?}"),
InferenceError::Recursive(_, _) => write!(f, "Recursive type!"),
}
}
}

View File

@ -49,6 +49,24 @@
//! 4. TODO: Construct a typed AST for expressions, and type-check them
#![warn(clippy::all)]
/*
How do I flesh out modules in an incremental way?
1. Visit all *modules* and create nodes in a module tree for them
- This can be done by holding mutable references to submodules!
Module:
values: Map(name -> Value),
types: Map(name -> Type),
Value: Either
function: { signature: Type, body: FunctionBody }
static: { Mutability, ty: Type, init: ConstEvaluationResult }
const: { ty: Type, init: ConstEvaluationResult }
*/
pub mod key;
pub mod node;
@ -67,245 +85,7 @@ pub mod use_importer;
pub mod type_resolver;
pub mod inference {
//! Implements type unification, used by the Hindley-Milner type inference algorithm
//!
//! Inspired by [rust-hindley-milner][1] and [hindley-milner-python][2]
//!
//! [1]: https://github.com/tcr/rust-hindley-milner/
//! [2]: https://github.com/rob-smallshire/hindley-milner-python
use cl_ast::Sym;
use core::fmt;
use std::{cell::RefCell, rc::Rc};
/*
Types in Conlang:
- Never type: !
- type !
- for<A> ! -> A
- Primitive types: bool, i32, (), ...
- type bool; ...
- Reference types: &T, *T
- for<T> type ref<T>; for<T> type ptr<T>
- Slice type: [T]
- for<T> type slice<T>
- Array type: [T;usize]
- for<T> type array<T, instanceof<usize>>
- Tuple type: (T, ...Z)
- for<T, ..> type tuple<T, ..> // on a per-case basis!
- Funct type: fn Tuple -> R
- for<T, R> type T -> R // on a per-case basis!
*/
/// A refcounted [Type]
pub type RcType = Rc<Type>;
/// A [Type::Variable] or [Type::Operator]:
/// - A [Type::Variable] can be either bound or unbound (instance: Some(_) | None)
/// - A [Type::Operator] has a name (used to identify the operator) and a list of types.
///
/// A type which contains unbound variables is considered "generic" (see
/// [`Type::is_generic()`]).
#[derive(Debug, PartialEq, Eq)]
pub enum Type {
Variable {
instance: RefCell<Option<RcType>>,
},
Operator {
name: Sym,
types: RefCell<Vec<RcType>>,
},
}
impl Type {
/// Creates a new unbound [type variable](Type::Variable)
pub fn new_var() -> RcType {
Rc::new(Self::Variable { instance: RefCell::new(None) })
}
/// Creates a variable that is a new instance of another [Type]
pub fn new_inst(of: &RcType) -> RcType {
Rc::new(Self::Variable { instance: RefCell::new(Some(of.clone())) })
}
/// Creates a new [type operator](Type::Operator)
pub fn new_op(name: Sym, types: &[RcType]) -> RcType {
Rc::new(Self::Operator { name, types: RefCell::new(types.to_vec()) })
}
/// Creates a new [type operator](Type::Operator) representing a lambda
pub fn new_fn(takes: &RcType, returns: &RcType) -> RcType {
Self::new_op("fn".into(), &[takes.clone(), returns.clone()])
}
/// Creates a new [type operator](Type::Operator) representing a primitive type
pub fn new_prim(name: Sym) -> RcType {
Self::new_op(name, &[])
}
/// Creates a new [type operator](Type::Operator) representing a tuple
pub fn new_tuple(members: &[RcType]) -> RcType {
Self::new_op("tuple".into(), members)
}
/// Sets this type variable to be an instance `of` the other
/// # Panics
/// Panics if `self` is not a type variable
pub fn set_instance(self: &RcType, of: &RcType) {
match self.as_ref() {
Type::Operator { .. } => unimplemented!("Cannot set instance of a type operator"),
Type::Variable { instance } => *instance.borrow_mut() = Some(of.clone()),
}
}
/// Checks whether there are any unbound type variables in this type.
/// ```rust
/// # use cl_typeck::inference::*;
/// let bool = Type::new_op("bool".into(), &[]);
/// let true_v = Type::new_inst(&bool);
/// let unbound = Type::new_var();
/// let id_fun = Type::new_fn(&unbound, &unbound);
/// let truthy = Type::new_fn(&unbound, &bool);
/// assert!(!bool.is_generic()); // bool contains no unbound type variables
/// assert!(!true_v.is_generic()); // true_v is bound to `bool`
/// assert!(unbound.is_generic()); // unbound is an unbound type variable
/// assert!(id_fun.is_generic()); // id_fun is a function with unbound type variables
/// assert!(truthy.is_generic()); // truthy is a function with one unbound type variable
/// ```
pub fn is_generic(self: &RcType) -> bool {
match self.as_ref() {
Type::Variable { instance } => match instance.borrow().as_ref() {
// base case: self is an unbound type variable (instance is none)
None => true,
// Variable is bound to a type which may be generic
Some(instance) => instance.is_generic(),
},
Type::Operator { types, .. } => {
// Operator may have generic args
types.borrow().iter().any(Self::is_generic)
}
}
}
/// Makes a deep copy of a type expression.
///
/// Bound variables are shared, unbound variables are duplicated.
pub fn deep_clone(self: &RcType) -> RcType {
// If there aren't any unbound variables, it's fine to clone the entire expression
if !self.is_generic() {
return self.clone();
}
// There are unbound type variables, so we make a new one
match self.as_ref() {
Type::Variable { .. } => Self::new_var(),
Type::Operator { name, types } => Self::new_op(
*name,
&types
.borrow()
.iter()
.map(Self::deep_clone)
.collect::<Vec<_>>(),
),
}
}
/// Returns the defining instance of `self`,
/// collapsing type instances along the way.
/// # May panic
/// Panics if this type variable's instance field is already borrowed.
/// # Examples
/// ```rust
/// # use cl_typeck::inference::*;
/// let t_bool = Type::new_op("bool".into(), &[]);
/// let t_nest = Type::new_inst(&Type::new_inst(&Type::new_inst(&t_bool)));
/// let pruned = t_nest.prune();
/// assert_eq!(pruned, t_bool);
/// assert_eq!(t_nest, Type::new_inst(&t_bool));
/// ```
pub fn prune(self: &RcType) -> RcType {
if let Type::Variable { instance } = self.as_ref() {
if let Some(old_inst) = instance.borrow_mut().as_mut() {
let new_inst = old_inst.prune(); // get defining instance
*old_inst = new_inst.clone(); // collapse
return new_inst;
}
}
self.clone()
}
/// Checks whether a type expression occurs in another type expression
///
/// # Note:
/// - Since the test uses strict equality, `self` should be pruned prior to testing.
/// - The test is *not guaranteed to terminate* for recursive types.
pub fn occurs_in(self: &RcType, other: &RcType) -> bool {
if self == other {
return true;
}
match other.as_ref() {
Type::Variable { instance } => match instance.borrow().as_ref() {
Some(t) => self.occurs_in(t),
None => false,
},
Type::Operator { types, .. } => {
// Note: this might panic.
// Think about whether it panics for only recursive types?
types.borrow().iter().any(|other| self.occurs_in(other))
}
}
}
/// Unifies two type expressions, propagating changes via interior mutability
pub fn unify(self: &RcType, other: &RcType) -> Result<(), InferenceError> {
let (a, b) = (self.prune(), other.prune()); // trim the hedges
match (a.as_ref(), b.as_ref()) {
(Type::Variable { .. }, _) if !a.occurs_in(&b) => a.set_instance(&b),
(Type::Variable { .. }, _) => Err(InferenceError::Recursive(a, b))?,
(Type::Operator { .. }, Type::Variable { .. }) => b.unify(&a)?,
(
Type::Operator { name: a_name, types: a_types },
Type::Operator { name: b_name, types: b_types },
) => {
let (a_types, b_types) = (a_types.borrow(), b_types.borrow());
if a_name != b_name || a_types.len() != b_types.len() {
Err(InferenceError::Mismatch(a.clone(), b.clone()))?
}
for (a, b) in a_types.iter().zip(b_types.iter()) {
a.unify(b)?
}
}
}
Ok(())
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Type::Variable { instance } => match instance.borrow().as_ref() {
Some(instance) => write!(f, "{instance}"),
None => write!(f, "_"),
},
Type::Operator { name, types } => {
write!(f, "({name}")?;
for ty in types.borrow().iter() {
write!(f, " {ty}")?;
}
f.write_str(")")
}
}
}
}
/// An error produced during type inference
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum InferenceError {
Mismatch(RcType, RcType),
Recursive(RcType, RcType),
}
impl fmt::Display for InferenceError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InferenceError::Mismatch(a, b) => write!(f, "Type mismatch: {a:?} != {b:?}"),
InferenceError::Recursive(_, _) => write!(f, "Recursive type!"),
}
}
}
}
pub mod inference;
/*