Skip to content

Commit

Permalink
Change function representation to be multi-ary (#2112)
Browse files Browse the repository at this point in the history
In the new AST, the representation function application has been changed
to be multi-ary by default (as is primop application as well). It just
matches the syntax better and it avoids having to desugar n-ary
application to nested unary-application, saving space and time. The only
cost is that unary application are slighty larger in memory (basically
one word for the size of the slice).

This commit applies the same treatment to function abstraction since the
same arguments apply. Passing by, we also specialize macros like `fun`
and `app` for one argument to avoid vec allocation.
  • Loading branch information
yannham authored Dec 6, 2024
1 parent 17686e0 commit eaf676c
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ warning: plain functions as contracts are deprecated
┌─ [INPUTS_PATH]/warnings/naked_contract_parametrized.ncl:6:3
3let C = fun arg label value => value in
---------------------------- this function
-------------------- this function
·
6 │ [1, 2] | Array (C "hi")
│ ^^^^^^ applied to this term
Expand All @@ -17,7 +17,7 @@ warning: plain functions as contracts are deprecated
┌─ [INPUTS_PATH]/warnings/naked_contract_parametrized.ncl:5:3
3 │ let C = fun arg label value => value in
---------------------------- this function
-------------------- this function
4 │ [
5 │ 1 | C "hi",
│ ^ applied to this term
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ warning: plain functions as contracts are deprecated
= wrap this function using one of the constructors in `std.contract` instead, like `std.contract.from_validator` or `std.contract.custom`

error: non serializable term
┌─ [INPUTS_PATH]/errors/non_serializable_print_path.ncl:8:30
┌─ [INPUTS_PATH]/errors/non_serializable_print_path.ncl:8:50
8 │ let SomeParametricContract = fun parameter label value => value
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^
= When exporting field `foo.bar.baz[3].inner.qux_miss_param`
= Nickel only supports serializing to and from strings, booleans, numbers, enum tags, `null` (depending on the format), as well as records and arrays of serializable values.
Expand Down
69 changes: 57 additions & 12 deletions core/src/bytecode/ast/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,21 @@ impl<'ast> Record<'ast> {
/// Multi-ary application for types implementing `Into<Ast>`.
#[macro_export]
macro_rules! app {
( $alloc:expr, $f:expr $(, $args:expr )+ $(,)?) => {
// We avoid a vec allocation for unary applications, which are relatively common.
( $alloc:expr, $f:expr , $arg:expr $(,)?) => {
$crate::bytecode::ast::Ast::from(
$alloc.app(
$crate::bytecode::ast::Ast::from($f),
std::iter::once($crate::bytecode::ast::Ast::from($arg))
)
)
};
( $alloc:expr, $f:expr, $arg1:expr $(, $args:expr )+ $(,)?) => {
{
let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+];
let args = vec![
$crate::bytecode::ast::Ast::from($arg1)
$(, $crate::bytecode::ast::Ast::from($args) )+
];

$crate::bytecode::ast::Ast::from($alloc.app($crate::bytecode::ast::Ast::from($f), args))
}
Expand All @@ -324,9 +336,21 @@ macro_rules! app {
#[macro_export]
/// Multi-ary application for types implementing `Into<RichTerm>`.
macro_rules! primop_app {
( $alloc: expr, $op:expr $(, $args:expr )+ $(,)?) => {
// We avoid a vec allocation for unary primop applications, which are relatively common.
( $alloc:expr, $op:expr , $arg:expr $(,)?) => {
$crate::bytecode::ast::Ast::from(
$alloc.prim_op(
$op,
std::iter::once($crate::bytecode::ast::Ast::from($arg))
)
)
};
( $alloc:expr, $op:expr, $arg1:expr $(, $args:expr )+ $(,)?) => {
{
let args = vec![$( $crate::bytecode::ast::Ast::from($args) ),+];
let args = vec![
$crate::bytecode::ast::Ast::from($arg1)
$(, $crate::bytecode::ast::Ast::from($args) )+
];
$crate::bytecode::ast::Ast::from($alloc.prim_op($op, args))
}
};
Expand All @@ -336,20 +360,41 @@ macro_rules! primop_app {
/// Multi argument function for types implementing `Into<Ident>` (for the identifiers), and
/// `Into<RichTerm>` for the body.
macro_rules! fun {
( $alloc: expr, $id:expr, $body:expr $(,)?) => {
// Auxiliary form for `fun!` where the arguments are clearly delimited syntactically by
// brackets. The issue with the general interface of `fun!` is that it must extract its last
// argument, the body of the function, with an arbitrary number of macro arguments before it.
// This isn't easy to do because the macro parser can't backtrack. The bracketed form uses
// recursion and a separate syntax for arguments to make it work.
//
// For example, calling `fun!(alloc, args=[], arg1, arg2, body)` will expand to `fun!(alloc,
// args=[arg1, arg2], body)`, which is the base case that can be turned into a function. The
// bracket part is used to accumulate the arguments before the body.
($alloc:expr, args=[ $( $args:expr, )+ ], $body:expr) => {
{
let args = vec![
$($crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($args)), )+
];

$crate::bytecode::ast::Ast::from(
$alloc.fun(args, $crate::bytecode::ast::Ast::from($body))
)
}
};
// Recursive case for the bracketed form.
($alloc:expr, args=[ $( $args:expr, )* ], $next_arg:expr $(, $rest:expr )+) => {
fun!($alloc, args=[ $( $args, )* $next_arg, ] $(, $rest )+)
};
// We avoid a vec allocation for unary functions, which are relatively common.
( $alloc:expr, $arg:expr, $body:expr $(,)?) => {
$crate::bytecode::ast::Ast::from(
$alloc.fun(
$crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($id)),
std::iter::once($crate::bytecode::ast::pattern::Pattern::any($crate::identifier::LocIdent::from($arg))),
$crate::bytecode::ast::Ast::from($body)
)
)
};
( $alloc:expr, $id1:expr, $id2:expr $(, $rest:expr )+ $(,)?) => {
fun!(
$alloc,
$id1,
fun!($alloc, $id2, $( $rest ),+)
)
( $alloc:expr, $arg1:expr, $arg2:expr $(, $rest:expr )+ $(,)?) => {
fun!($alloc, args=[ $arg1, $arg2, ] $(, $rest )+)
};
}

Expand Down
96 changes: 73 additions & 23 deletions core/src/bytecode/ast/compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,33 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> {
}
}))
}
Term::Fun(id, body) => alloc.fun(Pattern::any(*id), body.to_ast(alloc)),
Term::FunPattern(pat, body) => alloc.fun(pat.to_ast(alloc), body.to_ast(alloc)),
t @ (Term::Fun(_, body) | Term::FunPattern(_, body)) => {
let fst_arg = match t {
Term::Fun(id, _) => Pattern::any(*id),
Term::FunPattern(pat, _) => pat.to_ast(alloc),
// unreachable!(): we are in a match arm that matches either Fun or FunPattern
_ => unreachable!(),
};

let mut args = vec![fst_arg];
let mut maybe_next_fun = body;

let final_body = loop {
match maybe_next_fun.as_ref() {
Term::Fun(next_id, next_body) => {
args.push(Pattern::any(*next_id));
maybe_next_fun = next_body;
}
Term::FunPattern(next_pat, next_body) => {
args.push(next_pat.to_ast(alloc));
maybe_next_fun = next_body;
}
_ => break maybe_next_fun,
}
};

alloc.fun(args, final_body.to_ast(alloc))
}
Term::Let(bindings, body, attrs) => alloc.let_block(
bindings.iter().map(|(id, value)| LetBinding {
pattern: Pattern::any(*id),
Expand Down Expand Up @@ -327,7 +352,11 @@ impl<'ast> FromMainline<'ast, term::Term> for Node<'ast> {
maybe_next_app = next_fun.as_ref();
}

alloc.app(fun.to_ast(alloc), args.into_iter().rev())
// Application is left-associative: `f x y` is parsed as `(f x) y`. So we see the
// outer argument `y` first, and `args` will be `[y, x]`. We need to reverse it to
// match the expected order of a flat application node.
args.reverse();
alloc.app(fun.to_ast(alloc), args)
}
Term::Var(id) => Node::Var(*id),
Term::Enum(id) => alloc.enum_variant(*id, None),
Expand Down Expand Up @@ -1207,10 +1236,31 @@ impl<'ast> FromAst<Node<'ast>> for term::Term {

Term::StrChunks(chunks)
}
Node::Fun { arg, body } => match arg.data {
PatternData::Any(id) => Term::Fun(id, body.to_mainline()),
_ => Term::FunPattern((*arg).to_mainline(), body.to_mainline()),
},
Node::Fun { args, body } => {
let body_pos = body.pos;

// We transform a n-ary function representation to a chain of nested unary
// functions, the latter being the representation used in the mainline AST.
args.iter()
.rev()
.fold(term::RichTerm::from_ast(body), |acc, arg| {
let term = match arg.data {
PatternData::Any(id) => Term::Fun(id, acc),
_ => term::Term::FunPattern((*arg).to_mainline(), acc),
};

// [^nary-constructors-unrolling]: this case is a bit annoying: we need to
// extract the position of the intermediate created functions to satisfy
// the old AST structure, but this information isn't available directly.
//
// What we do here is to fuse the span of the term being built and the one
// of the current argument, which should be a reasonable approximation (if
// not exactly the same thing).
term::RichTerm::new(term, arg.pos.fuse(body_pos))
})
.term
.into_owned()
}
Node::Let {
bindings,
body,
Expand Down Expand Up @@ -1274,22 +1324,22 @@ impl<'ast> FromAst<Node<'ast>> for term::Term {
Term::LetPattern(bindings, body, attrs)
}
}
Node::App { head: fun, args } => {
let fun_pos = fun.pos;

let rterm = args.iter().fold(fun.to_mainline(), |result, arg| {
// This case is a bit annoying: we need to extract the position of the sub
// application to satisfy the old AST structure, but this information isn't
// available directly.
//
// What we do here is to fuse the span of the term being built and the one of
// the current argument, which should be a reasonable approximation (if not
// exactly the same thing).
let arg_pos = arg.pos;
term::RichTerm::new(Term::App(result, arg.to_mainline()), fun_pos.fuse(arg_pos))
});

rterm.term.into_owned()
Node::App { head, args } => {
let head_pos = head.pos;

// We transform a n-ary application representation to a chain of nested unary
// applications, the latter being the representation used in the mainline AST.
args.iter()
.fold(head.to_mainline(), |result, arg| {
// see [^nary-constructors-unrolling]
let arg_pos = arg.pos;
term::RichTerm::new(
Term::App(result, arg.to_mainline()),
head_pos.fuse(arg_pos),
)
})
.term
.into_owned()
}
Node::Var(loc_ident) => Term::Var(*loc_ident),
Node::EnumVariant { tag, arg } => {
Expand Down
35 changes: 16 additions & 19 deletions core/src/bytecode/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
use std::{
ffi::{OsStr, OsString},
fmt::Debug,
rc,
iter, rc,
};

use pattern::Pattern;
Expand Down Expand Up @@ -79,7 +79,7 @@ pub enum Node<'ast> {

/// A function.
Fun {
arg: &'ast Pattern<'ast>,
args: &'ast [Pattern<'ast>],
body: &'ast Ast<'ast>,
},

Expand Down Expand Up @@ -385,8 +385,7 @@ impl AstAlloc {

/// Allocates an array with exactly one element in the arena.
pub fn alloc_singleton<T: Allocable>(&self, value: T) -> &[T] {
self.generic_arena
.alloc_slice_fill_iter(std::iter::once(value))
self.generic_arena.alloc_slice_fill_iter(iter::once(value))
}

/// Allocates a string in the arena.
Expand Down Expand Up @@ -414,24 +413,22 @@ impl AstAlloc {
Node::StringChunks(self.generic_arena.alloc_slice_fill_iter(chunks))
}

pub fn fun<'ast>(&'ast self, pat: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> {
let arg = self.generic_arena.alloc(pat);
let body = self.generic_arena.alloc(body);
Node::Fun { arg, body }
}

pub fn nary_fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast>
pub fn fun<'ast, I>(&'ast self, args: I, body: Ast<'ast>) -> Node<'ast>
where
I: IntoIterator<Item = Pattern<'ast>>,
I::IntoIter: DoubleEndedIterator,
I::IntoIter: ExactSizeIterator,
{
args.into_iter()
.rev()
.fold(body, |body, arg| Ast {
node: self.fun(arg, body),
pos: TermPos::None,
})
.node
Node::Fun {
args: self.generic_arena.alloc_slice_fill_iter(args),
body: self.generic_arena.alloc(body),
}
}

pub fn unary_fun<'ast>(&'ast self, arg: Pattern<'ast>, body: Ast<'ast>) -> Node<'ast> {
Node::Fun {
args: self.generic_arena.alloc_slice_fill_iter(iter::once(arg)),
body: self.generic_arena.alloc(body),
}
}

pub fn let_block<'ast, I>(&'ast self, bindings: I, body: Ast<'ast>, rec: bool) -> Node<'ast>
Expand Down
13 changes: 2 additions & 11 deletions core/src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -288,17 +288,8 @@ UniTerm: UniTerm<'ast> = {
body,
)?))
},
<l: @L> "fun" <pats: PatternFun+> "=>" <body: Term> <r: @R> => {
let pos = mk_pos(src_id, l, r);

let expr = pats
.into_iter()
.rev()
.fold(body, |built, next_arg|
alloc.fun(next_arg, built).spanned(pos)
);

UniTerm::from(expr)
"fun" <pats: PatternFun+> "=>" <body: Term> => {
UniTerm::from(alloc.fun(pats, body))
},
"if" <cond: Term> "then" <e1: Term> "else" <e2: Term> =>
UniTerm::from(alloc.if_then_else(cond, e1, e2)),
Expand Down
2 changes: 1 addition & 1 deletion core/src/parser/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl EtaExpand for InfixOp {
let fun_args: Vec<_> = vars.iter().map(|arg| pattern::Pattern::any(*arg)).collect();
let args: Vec<_> = vars.into_iter().map(builder::var).collect();

alloc.nary_fun(fun_args, alloc.prim_op(op, args).spanned(pos))
alloc.fun(fun_args, alloc.prim_op(op, args).spanned(pos))
}
}
}
Expand Down

0 comments on commit eaf676c

Please sign in to comment.