From 06c139e0315567cc70697e7a29ce19cc0e7e606d Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 16 Oct 2024 19:06:28 +0200 Subject: [PATCH 1/8] start refactor --- src/callable/primitive/c.rs | 2 +- src/object/list.rs | 4 +- src/object/vector/core.rs | 75 ++--- src/object/vector/reptype.rs | 610 ++++++++++++++++++++++++++++++++--- 4 files changed, 608 insertions(+), 83 deletions(-) diff --git a/src/callable/primitive/c.rs b/src/callable/primitive/c.rs index 91b5732..a85f639 100644 --- a/src/callable/primitive/c.rs +++ b/src/callable/primitive/c.rs @@ -213,7 +213,7 @@ impl Callable for PrimitiveC { }; if let Some(names) = names { - v.set_names(names.into()) + v.set_names(names.into()); } Ok(Obj::Vector(v)) } diff --git a/src/object/list.rs b/src/object/list.rs index dfc3e5d..fa2cad7 100644 --- a/src/object/list.rs +++ b/src/object/list.rs @@ -1,7 +1,7 @@ -use crate::object::rep::Rep; +use crate::object::reptype::RepType; use crate::object::Obj; -pub type List = Rep; +pub type List = RepType; #[cfg(test)] mod tests { diff --git a/src/object/vector/core.rs b/src/object/vector/core.rs index 1966e45..0a1f84a 100644 --- a/src/object/vector/core.rs +++ b/src/object/vector/core.rs @@ -54,10 +54,10 @@ impl OptionNA { #[derive(Debug, PartialEq)] pub enum Vector { - Double(Rep), - Integer(Rep), - Logical(Rep), - Character(Rep), + Double(RepType), + Integer(RepType), + Logical(RepType), + Character(RepType), // Complex(Complex), // Raw(Raw), } @@ -96,16 +96,16 @@ impl Vector { match self { Double(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Double(Rep::from(vec![x]))), + .map(|x| Double(RepType::from(vec![x]))), Integer(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Integer(Rep::from(vec![x]))), + .map(|x| Integer(RepType::from(vec![x]))), Character(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Character(Rep::from(vec![x]))), + .map(|x| Character(RepType::from(vec![x]))), Logical(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Logical(Rep::from(vec![x]))), + .map(|x| Logical(RepType::from(vec![x]))), } } @@ -139,13 +139,14 @@ impl Vector { } } - pub fn set_names(&self, names: CowObj>) { + pub fn set_names(&self, names: CowObj>) -> Self { + use super::Vector::*; match self { - Vector::Character(x) => x.set_names(names), - Vector::Logical(x) => x.set_names(names), - Vector::Integer(x) => x.set_names(names), - Vector::Double(x) => x.set_names(names), - }; + Character(x) => Character(x.set_names(names)), + Logical(x) => Logical(x.set_names(names)), + Integer(x) => Integer(x.set_names(names)), + Double(x) => Double(x.set_names(names)), + } } pub fn try_get(&self, index: Obj) -> EvalResult { @@ -347,29 +348,29 @@ impl From> for Vector { } } -impl From> for Vector { - fn from(x: Rep) -> Self { - Vector::Double(x) - } -} - -impl From> for Vector { - fn from(x: Rep) -> Self { - Vector::Integer(x) - } -} - -impl From> for Vector { - fn from(x: Rep) -> Self { - Vector::Logical(x) - } -} - -impl From> for Vector { - fn from(x: Rep) -> Self { - Vector::Character(x) - } -} +// impl From> for Vector { +// fn from(x: Rep) -> Self { +// Vector::Double(x) +// } +// } + +// impl From> for Vector { +// fn from(x: Rep) -> Self { +// Vector::Integer(x) +// } +// } + +// impl From> for Vector { +// fn from(x: Rep) -> Self { +// Vector::Logical(x) +// } +// } + +// impl From> for Vector { +// fn from(x: Rep) -> Self { +// Vector::Character(x) +// } +// } impl From> for Vector { fn from(x: Vec) -> Self { diff --git a/src/object/vector/reptype.rs b/src/object/vector/reptype.rs index 1b9f2f2..2371a57 100644 --- a/src/object/vector/reptype.rs +++ b/src/object/vector/reptype.rs @@ -1,10 +1,12 @@ use std::fmt::Debug; +use std::fmt::Display; +use std::iter::repeat; -use super::coercion::{AtomicMode, CoercibleInto}; +use super::coercion::{AtomicMode, CoercibleInto, CommonCmp, CommonNum, MinimallyNumeric}; use super::subset::Subset; use super::subsets::Subsets; use super::types::*; -use super::OptionNA; +use super::{OptionNA, Pow, VecPartialCmp}; use crate::error::Error; use crate::lang::Signal; use crate::object::{CowObj, ViewMut}; @@ -115,6 +117,12 @@ impl Default for RepType { } impl RepType { + /// Get a cloned version of the inner value. + /// This is used for accessing inner values like `list(1)[[1]]`. + pub fn try_get_inner(&self, subset: Subset) -> Result { + #[allow(clippy::map_clone)] + self.try_get_inner_mut(subset).map(|x| x.clone()) + } /// Retrieve the internal data as a mutable view. /// This is important for lists for things like `l$a[1:2] = c(10, 11)` pub fn try_get_inner_mut(&self, subset: Subset) -> Result { @@ -329,6 +337,39 @@ impl RepType { ) } + /// Whether the vector representation has names. + pub fn is_named(&self) -> bool { + matches!(self, RepType::Subset(.., Some(_))) + } + + /// Return the names of the vector if there are any. + pub fn names(&self) -> Option>> { + match self.clone() { + RepType::Subset(_, s, n) => { + if s.is_empty() { + n.map(|n| n.clone().names) + } else if n.is_some() { + Some( + self.iter_names() + .expect("checked that names exist") + .collect::>() + .into(), + ) + } else { + None + } + } + } + } + + // fn materialize_inplace(&self) -> &Self { + // // TODO: Rewrite this to avoid copying unnecessarily + // let new_repr = { self.borrow().materialize() }; + // self.0.replace(new_repr); + + // self + // } + pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { match &self { RepType::Subset(..) => { @@ -562,7 +603,7 @@ impl RepType { match self { RepType::Subset(v, Subsets(s), _) => match s.as_slice() { [] => v.borrow().len(), - _ => unimplemented!(), + _ => self.values_ref().iter().count(), // _ => self.materialize_inplace().len(), }, } } @@ -656,6 +697,23 @@ impl RepType { } } + // implement materialize_inplace + fn materialize_inplace(&mut self) { + *self = self.materialize(); + } + + /// Return the only value if the vector has length 1. + pub fn as_scalar(&self) -> Option { + let mut into_iter = self.values_ref(); + let mut iter = into_iter.iter(); + if let Some(x) = iter.next() { + if iter.next().is_none() { + return Some(x.clone()); + } + }; + None + } + /// Materialize a Vector /// /// Apply subsets and clone values into a new vector. @@ -924,10 +982,476 @@ where } } +impl Display for RepType +where + T: AtomicMode + Debug + Default + Clone, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let n = self.len(); + if n == 0 { + if self.is_double() { + return write!(f, "double(0)"); + } + if self.is_integer() { + return write!(f, "integer(0)"); + } + if self.is_logical() { + return write!(f, "logical(0)"); + } + if self.is_character() { + return write!(f, "character(0)"); + } + } + let nlen = format!("{}", n).len(); + // calculate how many characters are printed per value. + // The iteraror yields the characters needed for a specific item. + fn element_width(iter: impl Iterator) -> usize { + let mut elt_width = 1_usize; + for (i, width) in iter.enumerate() { + elt_width = std::cmp::max(elt_width, width); + if elt_width * (i + 1) >= 20 * 80 { + break; + } + } + elt_width + } + + if !self.is_named() { + let elt_width = + element_width(self.values_ref().iter().map(|x| format!("{:?}", x).len())); + + let mut values_ref = self.values_ref(); + let x_strs = values_ref.iter().map(|xi| format!("{:?}", xi)); + + let mut col = 0; + let gutterlen = 2 + nlen + 1; + + // hard coded max print & console width + // we print at most 20 rows + let maxprint = 20 * ((80 - gutterlen) / (elt_width + 1)); + + x_strs + .take(maxprint) + .enumerate() + .try_for_each(|(i, x_str)| { + if i == 0 { + col = gutterlen + elt_width; + write!( + f, + "{:>3$}[{}] {:>4$}", + "", + i + 1, + x_str, + nlen - 1, + elt_width + ) + } else if col + 1 + elt_width > 80 { + col = gutterlen + elt_width; + let i_str = format!("{}", i + 1); + let gutter = nlen - i_str.len(); + write!( + f, + "\n{:>3$}[{}] {:>4$}", + "", i_str, x_str, gutter, elt_width + ) + } else { + col += 1 + elt_width; + write!(f, " {:>1$}", x_str, elt_width) + } + })?; + + if n > maxprint { + write!(f, "\n[ omitting {} entries ]", n - maxprint)?; + } + } else { + let elt_width = element_width( + self.pairs_ref() + .iter() + .map(|x| std::cmp::max(format!("{:}", x.0).len(), format!("{:?}", x.1).len())), + ); + let mut values_ref = self.values_ref(); + let mut names_ref = self + .names_ref() + .expect("already checked existence of names"); + + let mut values_strs = values_ref.iter().map(|x| format!("{:?}", x)); + let mut names_strs = names_ref.iter().map(|x| format!("{:}", x)); + + // hard coded max print & console width + // we print at most 20 rows + let elts_per_line = 80 / (elt_width + 1); + + 'lines: for _ in 1..=20 { + for _ in 1..=elts_per_line { + if let Some(name) = names_strs.next() { + write!(f, "{:}{:>2$}", name, " ", elt_width - name.len())?; + } else { + break; + } + } + writeln!(f)?; + for _ in 1..=elts_per_line { + if let Some(value) = values_strs.next() { + write!(f, "{:}{:>2$}", value, " ", elt_width - value.len())?; + } else { + break 'lines; + } + } + writeln!(f)?; + } + } + Ok(()) + } +} + +impl std::ops::Neg for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + LNum: std::ops::Neg, + RepType: From>, + O: Clone, +{ + type Output = Result, Signal>; + fn neg(self) -> Self::Output { + let result: Vec = self + .iter_values() + .map(|x| -(CoercibleInto::::coerce_into(x))) + .collect(); + Ok(result.into()) + } +} + +impl std::ops::Add> for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + (LNum, RNum): CommonNum, + C: Clone + std::ops::Add + Default, + RepType: From>, + O: Clone + Default, +{ + type Output = Result, Signal>; + fn add(self, rhs: RepType) -> Self::Output { + try_binary_num_op(self, rhs, |x, y| x + y) + } +} + +impl std::ops::Sub> for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + (LNum, RNum): CommonNum, + C: Clone + std::ops::Sub + Default, + RepType: From>, + O: Clone + Default, +{ + type Output = Result, Signal>; + fn sub(self, rhs: RepType) -> Self::Output { + try_binary_num_op(self, rhs, |x, y| x - y) + } +} + +impl std::ops::Mul> for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + (LNum, RNum): CommonNum, + C: Clone + std::ops::Mul + Default, + RepType: From>, + O: Clone + Default, +{ + type Output = Result, Signal>; + fn mul(self, rhs: RepType) -> Self::Output { + try_binary_num_op(self, rhs, |x, y| x * y) + } +} + +impl std::ops::Div> for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + (LNum, RNum): CommonNum, + C: Clone + std::ops::Div + Default, + RepType: From>, + O: Clone + Default, +{ + type Output = Result, Signal>; + fn div(self, rhs: RepType) -> Self::Output { + try_binary_num_op(self, rhs, |x, y| x / y) + } +} + +impl std::ops::Rem> for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + (LNum, RNum): CommonNum, + C: Clone + std::ops::Rem + Default, + RepType: From>, + O: Clone + Default, +{ + type Output = Result, Signal>; + fn rem(self, rhs: RepType) -> Self::Output { + try_binary_num_op(self, rhs, |x, y| x % y) + } +} + +impl Pow> for RepType +where + L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, + (LNum, RNum): CommonNum, + O: Pow, + RepType: From>, + O: Default, + L: Clone, + R: Clone, + O: Clone, +{ + type Output = Result, Signal>; + fn power(self, rhs: RepType) -> Self::Output { + try_binary_num_op(self, rhs, |x, y| Pow::power(x, y)) + } +} + +impl std::ops::BitOr> for RepType +where + L: AtomicMode + Default + Clone + CoercibleInto, + R: AtomicMode + Default + Clone + CoercibleInto, +{ + type Output = Result, Signal>; + fn bitor(self, rhs: RepType) -> Self::Output { + try_binary_lgl_op(self, rhs, |x, y| x | y) + } +} + +impl std::ops::BitAnd> for RepType +where + L: AtomicMode + Default + Clone + CoercibleInto, + R: AtomicMode + Default + Clone + CoercibleInto, +{ + type Output = Result, Signal>; + fn bitand(self, rhs: RepType) -> Self::Output { + try_binary_lgl_op(self, rhs, |x, y| x & y) + } +} + +impl std::ops::Not for RepType +where + L: AtomicMode + Default + Clone + CoercibleInto, +{ + type Output = Result, Signal>; + fn not(self) -> Self::Output { + let result: Vec = self + .iter_values() + .map(|x| !(CoercibleInto::::coerce_into(x))) + .collect(); + Ok(result.into()) + } +} + +impl VecPartialCmp> for RepType +where + L: AtomicMode + Default + Clone + CoercibleInto + Clone, + R: AtomicMode + Default + Clone + CoercibleInto + Clone, + (L, R): CommonCmp, + C: PartialOrd + Clone + Default, +{ + type Output = Result, Signal>; + + fn vec_gt(self, rhs: RepType) -> Self::Output { + use std::cmp::Ordering::*; + try_binary_cmp_op(self, rhs, |i| match i { + Some(Greater) => OptionNA::Some(true), + Some(_) => OptionNA::Some(false), + None => OptionNA::NA, + }) + } + + fn vec_gte(self, rhs: RepType) -> Self::Output { + use std::cmp::Ordering::*; + try_binary_cmp_op(self, rhs, |i| match i { + Some(Greater | Equal) => OptionNA::Some(true), + Some(_) => OptionNA::Some(false), + None => OptionNA::NA, + }) + } + + fn vec_lt(self, rhs: RepType) -> Self::Output { + use std::cmp::Ordering::*; + try_binary_cmp_op(self, rhs, |i| match i { + Some(Less) => OptionNA::Some(true), + Some(_) => OptionNA::Some(false), + None => OptionNA::NA, + }) + } + + fn vec_lte(self, rhs: RepType) -> Self::Output { + use std::cmp::Ordering::*; + try_binary_cmp_op(self, rhs, |i| match i { + Some(Less | Equal) => OptionNA::Some(true), + Some(_) => OptionNA::Some(false), + None => OptionNA::NA, + }) + } + + fn vec_eq(self, rhs: RepType) -> Self::Output { + use std::cmp::Ordering::*; + try_binary_cmp_op(self, rhs, |i| match i { + Some(Equal) => OptionNA::Some(true), + Some(_) => OptionNA::Some(false), + None => OptionNA::NA, + }) + } + + fn vec_neq(self, rhs: RepType) -> Self::Output { + use std::cmp::Ordering::*; + try_binary_cmp_op(self, rhs, |i| match i { + Some(Equal) => OptionNA::Some(false), + Some(_) => OptionNA::Some(true), + None => OptionNA::NA, + }) + } +} + +/// This function applies a function `g` to pairs from lhs and rhs. +/// The function returns an error when the lengths are not compatible. +fn try_recycle_then( + lhs: RepType, + rhs: RepType, + g: F, +) -> Result, Signal> +where + L: Clone + Default, + R: Clone + Default, + RepType: From>, + O: Clone + Default, + A: Clone, + F: Fn(L, R) -> O, +{ + match (lhs.as_scalar(), rhs.as_scalar()) { + (Some(l), Some(r)) => { + let result: Vec = vec![g(l, r)]; + Ok(RepType::from(result)) + } + (Some(l), None) => { + let result: Vec = repeat(l) + .zip(rhs.iter_values()) + .map(|(l, r)| g(l, r)) + .collect(); + if result.is_empty() { + return Err(Signal::Error(Error::NonRecyclableLengths(1, 0))); + } + Ok(RepType::from(result)) + } + (None, Some(r)) => { + let result: Vec = lhs + .iter_values() + .zip(repeat(r)) + .map(|(l, r)| g(l, r)) + .collect(); + if result.is_empty() { + return Err(Signal::Error(Error::NonRecyclableLengths(0, 1))); + } + Ok(RepType::from(result)) + } + (None, None) => { + let mut lc = lhs.iter_values(); + let mut rc = rhs.iter_values(); + + let max_size = std::cmp::max(lc.size_hint().0, rc.size_hint().0); + + let mut result: Vec = Vec::with_capacity(max_size); + + loop { + match (lc.next(), rc.next()) { + (Some(l), Some(r)) => result.push(g(l, r)), + (Some(_), None) => { + return Err(Signal::Error(Error::NonRecyclableLengths( + result.len() + 1 + lc.count(), + result.len(), + ))); + } + (None, Some(_)) => { + return Err(Signal::Error(Error::NonRecyclableLengths( + result.len(), + result.len() + 1 + rc.count(), + ))); + } + (None, None) => return Ok(RepType::from(result)), + } + } + } + } +} + +fn try_binary_num_op( + lhs: RepType, + rhs: RepType, + f: F, +) -> Result, Signal> +where + L: Default + Clone + MinimallyNumeric + CoercibleInto, + R: Default + Clone + MinimallyNumeric + CoercibleInto, + C: Default + Clone, + (LNum, RNum): CommonNum, + RepType: From>, + O: Clone + Default, + F: Fn(C, C) -> O, + C: Clone + Default, +{ + try_recycle_then(lhs, rhs, |x, y| { + let (c1, c2) = ( + CoercibleInto::::coerce_into(x), + CoercibleInto::::coerce_into(y), + ) + .into_common(); + f(c1, c2) + }) +} + +// FIXME(performance): equality with references for characters +fn try_binary_cmp_op( + lhs: RepType, + rhs: RepType, + f: F, +) -> Result, Signal> +where + L: AtomicMode + Default + Clone + CoercibleInto + Clone, + R: AtomicMode + Default + Clone + CoercibleInto + Clone, + (L, R): CommonCmp, + C: PartialOrd + Clone + Default, + F: Fn(Option) -> Logical, +{ + try_recycle_then(lhs, rhs, |x, y| { + let c1: C = x.coerce_into(); + let c2: C = y.coerce_into(); + let ordering = c1.partial_cmp(&c2); + f(ordering) + }) +} + +pub fn try_binary_lgl_op( + lhs: RepType, + rhs: RepType, + f: F, +) -> Result, Signal> +where + L: AtomicMode + Default + Clone + CoercibleInto, + R: AtomicMode + Default + Clone + CoercibleInto, + F: Fn(Logical, Logical) -> Logical, +{ + try_recycle_then(lhs, rhs, |x, y| { + let (c1, c2) = ( + CoercibleInto::::coerce_into(x), + CoercibleInto::::coerce_into(y), + ); + f(c1, c2) + }) +} #[cfg(test)] mod test { use super::OptionNA::*; - use crate::object::rep::Rep; use crate::object::reptype::RepType; use crate::object::{types::*, OptionNA, VecPartialCmp}; use crate::r; @@ -935,11 +1459,11 @@ mod test { #[test] fn vector_add() { - let x = Rep::::from((1..=5).collect::>()); - let y = Rep::::from(vec![2, 5, 6, 2, 3]); + let x = RepType::::from((1..=5).collect::>()); + let y = RepType::::from(vec![2, 5, 6, 2, 3]); let z = (x + y).unwrap(); - assert_eq!(z, Rep::from(vec![3, 7, 9, 6, 8])); + assert_eq!(z, RepType::from(vec![3, 7, 9, 6, 8])); let expected_type = RepType::::new(); assert!(z.is_same_type_as(&expected_type)); @@ -948,11 +1472,11 @@ mod test { #[test] fn vector_mul() { - let x = Rep::::from((1..=5).collect::>()); - let y = Rep::::from(vec![Some(2), NA, Some(6), NA, Some(3)]); + let x = RepType::::from((1..=5).collect::>()); + let y = RepType::::from(vec![Some(2), NA, Some(6), NA, Some(3)]); let z = (x * y).unwrap(); - assert_eq!(z, Rep::from(vec![Some(2), NA, Some(18), NA, Some(15),])); + assert_eq!(z, RepType::from(vec![Some(2), NA, Some(18), NA, Some(15),])); let expected_type = RepType::::new(); assert!(z.is_same_type_as(&expected_type)); @@ -964,8 +1488,8 @@ mod test { // expect that f32's do not get coerced into an OptionNA:: instead // using std::f32::NAN as NA representation. - let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); - let y = Rep::::from(vec![100, 10, 1]); + let x = RepType::::from(vec![Some(0_f64), NA, Some(10_f64)]); + let y = RepType::::from(vec![100, 10, 1]); let z = (x * y).unwrap(); // assert_eq!(z, Vector::from(vec![0_f32, std::f32::NAN, 1_000_f32])); @@ -981,11 +1505,11 @@ mod test { // expect that f32's do not get coerced into an OptionNA:: instead // using std::f32::NAN as NA representation. - let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); - let y = Rep::::from(vec![100, 10, 1]); + let x = RepType::::from(vec![Some(0_f64), NA, Some(10_f64)]); + let y = RepType::::from(vec![100, 10, 1]); let z = (x & y).unwrap(); - assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); + assert_eq!(z, RepType::from(vec![Some(false), NA, Some(true)])); let expected_type = RepType::::new(); assert!(z.is_same_type_as(&expected_type)); @@ -997,11 +1521,11 @@ mod test { // expect that f32's do not get coerced into an instead // using std::f32::NAN as NA representation. - let x = Rep::from(vec![Some(0_f64), NA, Some(10000_f64)]); - let y = Rep::::from(vec![100, 10, 1]); + let x = RepType::from(vec![Some(0_f64), NA, Some(10000_f64)]); + let y = RepType::::from(vec![100, 10, 1]); let z = x.vec_gt(y).unwrap(); - assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); + assert_eq!(z, RepType::from(vec![Some(false), NA, Some(true)])); let expected_type = RepType::::new(); assert!(z.is_same_type_as(&expected_type)); @@ -1069,7 +1593,7 @@ mod test { let x = r!(c(a = 1, 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_pairs() + r.iter_pairs() } else { unreachable!() }; @@ -1087,7 +1611,7 @@ mod test { let x = r!(c(1, 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_pairs() + r.iter_pairs() } else { unreachable!() }; @@ -1102,7 +1626,7 @@ mod test { let x = r!(c(1, 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_values() + r.iter_values() } else { unreachable!() }; @@ -1117,7 +1641,7 @@ mod test { let x = r!(c(1, 2)).unwrap(); let x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_names() + r.iter_names() } else { unreachable!() }; @@ -1130,7 +1654,7 @@ mod test { let x = r!(c(1, b = 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().iter_names().unwrap() + r.iter_names().unwrap() } else { unreachable!() }; @@ -1145,7 +1669,7 @@ mod test { let x = r!(c(1, b = 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().names_ref().unwrap() + r.names_ref().unwrap() } else { unreachable!() }; @@ -1163,7 +1687,7 @@ mod test { let x = r!(c(1, 2)).unwrap(); if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().names_ref().unwrap() + r.names_ref().unwrap() } else { unreachable!() }; @@ -1174,7 +1698,7 @@ mod test { let x = r!(c(1, b = 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().values_ref() + r.values_ref() } else { unreachable!() }; @@ -1191,7 +1715,7 @@ mod test { let x = r!(c(1, b = 2)).unwrap(); let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.borrow().clone().pairs_ref() + r.pairs_ref() } else { unreachable!() }; @@ -1211,8 +1735,8 @@ mod test { #[test] fn assign_recycle_incompatible() { - let mut x = Rep::::from(vec![1, 2, 3]); - let y = Rep::::from(vec![99, 99]); + let mut x = RepType::::from(vec![1, 2, 3]); + let y = RepType::::from(vec![99, 99]); let result = x.assign(y); assert_eq!( result.unwrap_err(), @@ -1221,8 +1745,8 @@ mod test { } #[test] fn assign_recycle_length_one() { - let x = Rep::::from(vec![1, 2, 3]); - let y = Rep::::from(vec![99]); + let x = RepType::::from(vec![1, 2, 3]); + let y = RepType::::from(vec![99]); let mut xview = x.subset(vec![0, 1].into()); let _ = xview.assign(y).unwrap(); let result_vec: Vec<_> = x.iter_values().collect(); @@ -1230,8 +1754,8 @@ mod test { } #[test] fn non_recyclable_lengths_3_2() { - let x = Rep::::from(vec![1, 2, 3]); - let y = Rep::::from(vec![99, 99]); + let x = RepType::::from(vec![1, 2, 3]); + let y = RepType::::from(vec![99, 99]); let result = x + y; assert_eq!( result.unwrap_err(), @@ -1240,8 +1764,8 @@ mod test { } #[test] fn non_recyclable_lengths_4_2() { - let x = Rep::::from(vec![1, 2, 3, 4]); - let y = Rep::::from(vec![99, 99]); + let x = RepType::::from(vec![1, 2, 3, 4]); + let y = RepType::::from(vec![99, 99]); let result = x + y; assert_eq!( result.unwrap_err(), @@ -1250,8 +1774,8 @@ mod test { } #[test] fn non_recyclable_lengths_2_3() { - let x = Rep::::from(vec![1, 2]); - let y = Rep::::from(vec![99, 99, 99]); + let x = RepType::::from(vec![1, 2]); + let y = RepType::::from(vec![99, 99, 99]); let result = x + y; assert_eq!( result.unwrap_err(), @@ -1260,8 +1784,8 @@ mod test { } #[test] fn non_recyclable_lengths_2_4() { - let x = Rep::::from(vec![1, 2]); - let y = Rep::::from(vec![99, 99, 99, 99]); + let x = RepType::::from(vec![1, 2]); + let y = RepType::::from(vec![99, 99, 99, 99]); let result = x + y; assert_eq!( result.unwrap_err(), @@ -1270,8 +1794,8 @@ mod test { } #[test] fn non_recyclable_lengths_0_1() { - let x = Rep::::from(Vec::::new()); - let y = Rep::::from(vec![99]); + let x = RepType::::from(Vec::::new()); + let y = RepType::::from(vec![99]); let result = x + y; assert_eq!( result.unwrap_err(), @@ -1280,8 +1804,8 @@ mod test { } #[test] fn non_recyclable_lengths_1_0() { - let x = Rep::::from(vec![99]); - let y = Rep::::from(Vec::::new()); + let x = RepType::::from(vec![99]); + let y = RepType::::from(Vec::::new()); let result = x + y; assert_eq!( result.unwrap_err(), From fd12d9e86efa830ad1a337882b2606bedbbb8d10 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Wed, 16 Oct 2024 19:32:54 +0200 Subject: [PATCH 2/8] ... --- src/callable/primitive/c.rs | 6 ++++-- src/object/vector/reptype.rs | 11 ++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/callable/primitive/c.rs b/src/callable/primitive/c.rs index a85f639..5f339ce 100644 --- a/src/callable/primitive/c.rs +++ b/src/callable/primitive/c.rs @@ -213,9 +213,11 @@ impl Callable for PrimitiveC { }; if let Some(names) = names { - v.set_names(names.into()); + println!("setting names"); + Ok(Obj::Vector(v.set_names(names.into()))) + } else { + Ok(Obj::Vector(v)) } - Ok(Obj::Vector(v)) } } diff --git a/src/object/vector/reptype.rs b/src/object/vector/reptype.rs index 2371a57..4892901 100644 --- a/src/object/vector/reptype.rs +++ b/src/object/vector/reptype.rs @@ -399,6 +399,10 @@ impl RepType { pub fn values_ref(&self) -> IntoIterableRefValues { match self.clone() { RepType::Subset(values, ..) => { + let iter = Box::new(self.iter_subset_indices()); + for x in iter { + dbg!(&x); + } let iter = Box::new(self.iter_subset_indices()); let values = values.inner_rc(); @@ -603,7 +607,12 @@ impl RepType { match self { RepType::Subset(v, Subsets(s), _) => match s.as_slice() { [] => v.borrow().len(), - _ => self.values_ref().iter().count(), // _ => self.materialize_inplace().len(), + _ => { + dbg!(&s[0]); + self.values_ref().iter().count(); // _ => self.materialize_inplace().len(), + println!("Wupsie"); + todo!() + } }, } } From ba646acb3dc1f236bdc5a628ff5c25ea40d5f7b5 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Thu, 17 Oct 2024 07:58:18 +0200 Subject: [PATCH 3/8] ... --- src/object/vector/reptype.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/object/vector/reptype.rs b/src/object/vector/reptype.rs index 4892901..83f841a 100644 --- a/src/object/vector/reptype.rs +++ b/src/object/vector/reptype.rs @@ -400,9 +400,9 @@ impl RepType { match self.clone() { RepType::Subset(values, ..) => { let iter = Box::new(self.iter_subset_indices()); - for x in iter { - dbg!(&x); - } + // for x in iter { + // dbg!(&x); + // } let iter = Box::new(self.iter_subset_indices()); let values = values.inner_rc(); @@ -607,12 +607,7 @@ impl RepType { match self { RepType::Subset(v, Subsets(s), _) => match s.as_slice() { [] => v.borrow().len(), - _ => { - dbg!(&s[0]); - self.values_ref().iter().count(); // _ => self.materialize_inplace().len(), - println!("Wupsie"); - todo!() - } + _ => self.values_ref().iter().count(), }, } } From e0287cb0b938231882b07d7592d3d9657bdf47bd Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Thu, 17 Oct 2024 08:26:57 +0200 Subject: [PATCH 4/8] ... --- src/CHANGELOG.md | 3 +- src/callable/core.rs | 2 +- src/callable/primitive/c.rs | 1 - src/callable/primitive/sum.rs | 28 +- src/object/list.rs | 4 +- src/object/vector/core.rs | 59 +- src/object/vector/mod.rs | 1 - src/object/vector/rep.rs | 1381 ++++++++++++++++++++----- src/object/vector/reptype.rs | 1819 --------------------------------- src/object/vector/subset.rs | 19 +- 10 files changed, 1177 insertions(+), 2140 deletions(-) delete mode 100644 src/object/vector/reptype.rs diff --git a/src/CHANGELOG.md b/src/CHANGELOG.md index d17608e..f7e1151 100644 --- a/src/CHANGELOG.md +++ b/src/CHANGELOG.md @@ -5,7 +5,7 @@ * Named vectors were added and can e.g. be constructed via `[a = 1, b = 2]` * The `is_null()` primitive was added * Setting a list value to `null` actually sets it to `null` and does not remove it. -* Stricter recycling rule are enforced (@98): +* Stricter recycling rules are enforced (@98): Vectorized operations on two vectors `v1` and `v2` now requires either of: * One of the vectors has length 1 and the other vector's length is not zero. * The vectors have the same length. @@ -20,6 +20,7 @@ This included a considerable refactor. * Iterating over references of a `Rep` was made much simpler and new methods were added and unused ones removed. +* The `RepType` struct that was introduced in 0.4.0 was removed again. ## Notable Bugs Addressed diff --git a/src/callable/core.rs b/src/callable/core.rs index 43334a4..457c34f 100644 --- a/src/callable/core.rs +++ b/src/callable/core.rs @@ -47,7 +47,7 @@ pub trait Callable: CallableFormals { for (i, (maybe_name, value)) in args.pairs_ref().iter().enumerate() { if let Character::Some(name) = maybe_name { - if let Some((Some(_), _)) = formals.remove_named(name) { + if let Some((Some(_), _)) = formals.remove_named(&name) { matched_args.push_named(Character::Some(name.clone()), value.clone()); continue; } diff --git a/src/callable/primitive/c.rs b/src/callable/primitive/c.rs index 5f339ce..920a3f0 100644 --- a/src/callable/primitive/c.rs +++ b/src/callable/primitive/c.rs @@ -213,7 +213,6 @@ impl Callable for PrimitiveC { }; if let Some(names) = names { - println!("setting names"); Ok(Obj::Vector(v.set_names(names.into()))) } else { Ok(Obj::Vector(v)) diff --git a/src/callable/primitive/sum.rs b/src/callable/primitive/sum.rs index 52470cb..11a8ab6 100644 --- a/src/callable/primitive/sum.rs +++ b/src/callable/primitive/sum.rs @@ -5,7 +5,7 @@ use crate::error::*; use crate::formals; use crate::internal_err; use crate::lang::*; -use crate::object::reptype::RepType; +use crate::object::rep::Rep; use crate::object::*; /// Calculate a Sum of Elements @@ -40,7 +40,7 @@ impl Callable for PrimitiveSum { let (_, ellipsis) = self.match_arg_exprs(args, stack)?; if ellipsis.is_empty() { - return EvalResult::Ok(Obj::Vector(Vector::from(RepType::from(vec![0.0])))); + return EvalResult::Ok(Obj::Vector(Vector::from(Rep::from(vec![0.0])))); } let objects: Vec = force_promises(ellipsis, stack)? @@ -78,8 +78,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x as i32 as f64, @@ -90,8 +90,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x as f64, @@ -102,8 +102,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x, @@ -116,7 +116,7 @@ impl Callable for PrimitiveSum { _ => return internal_err!(), } } - EvalResult::Ok(Obj::Vector(Vector::from(RepType::from(vec![sum])))) + EvalResult::Ok(Obj::Vector(Vector::from(Rep::from(vec![sum])))) } else { let mut sum: i32 = 0; @@ -128,8 +128,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x as i32, @@ -140,8 +140,8 @@ impl Callable for PrimitiveSum { for x in repr.inner().borrow().iter() { match *x { OptionNA::NA => { - let rep: RepType> = - RepType::from(vec![OptionNA::NA]); + let rep: Rep> = + Rep::from(vec![OptionNA::NA]); return EvalResult::Ok(Obj::Vector(Vector::from(rep))); } OptionNA::Some(x) => sum += x, @@ -154,7 +154,7 @@ impl Callable for PrimitiveSum { _ => return internal_err!(), } } - EvalResult::Ok(Obj::Vector(Vector::from(RepType::from(vec![sum])))) + EvalResult::Ok(Obj::Vector(Vector::from(Rep::from(vec![sum])))) } } } diff --git a/src/object/list.rs b/src/object/list.rs index fa2cad7..dfc3e5d 100644 --- a/src/object/list.rs +++ b/src/object/list.rs @@ -1,7 +1,7 @@ -use crate::object::reptype::RepType; +use crate::object::rep::Rep; use crate::object::Obj; -pub type List = RepType; +pub type List = Rep; #[cfg(test)] mod tests { diff --git a/src/object/vector/core.rs b/src/object/vector/core.rs index 0a1f84a..b819163 100644 --- a/src/object/vector/core.rs +++ b/src/object/vector/core.rs @@ -8,9 +8,8 @@ use crate::object::CowObj; use crate::object::Obj; use super::coercion::CoercibleInto; +use super::rep::IterableValues; use super::rep::Rep; -use super::reptype::IterableValues; -use super::reptype::RepType; use super::subset::Subset; use super::types::*; @@ -54,10 +53,10 @@ impl OptionNA { #[derive(Debug, PartialEq)] pub enum Vector { - Double(RepType), - Integer(RepType), - Logical(RepType), - Character(RepType), + Double(Rep), + Integer(Rep), + Logical(Rep), + Character(Rep), // Complex(Complex), // Raw(Raw), } @@ -96,16 +95,16 @@ impl Vector { match self { Double(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Double(RepType::from(vec![x]))), + .map(|x| Double(Rep::from(vec![x]))), Integer(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Integer(RepType::from(vec![x]))), + .map(|x| Integer(Rep::from(vec![x]))), Character(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Character(RepType::from(vec![x]))), + .map(|x| Character(Rep::from(vec![x]))), Logical(x) => x .set_subset(subset, value.try_into()?) - .map(|x| Logical(RepType::from(vec![x]))), + .map(|x| Logical(Rep::from(vec![x]))), } } @@ -324,54 +323,30 @@ impl From>> for Vector { } } -impl From> for Vector { - fn from(x: RepType) -> Self { +impl From> for Vector { + fn from(x: Rep) -> Self { Vector::Double(x.into()) } } -impl From> for Vector { - fn from(x: RepType) -> Self { +impl From> for Vector { + fn from(x: Rep) -> Self { Vector::Integer(x.into()) } } -impl From> for Vector { - fn from(x: RepType) -> Self { +impl From> for Vector { + fn from(x: Rep) -> Self { Vector::Logical(x.into()) } } -impl From> for Vector { - fn from(x: RepType) -> Self { +impl From> for Vector { + fn from(x: Rep) -> Self { Vector::Character(x.into()) } } -// impl From> for Vector { -// fn from(x: Rep) -> Self { -// Vector::Double(x) -// } -// } - -// impl From> for Vector { -// fn from(x: Rep) -> Self { -// Vector::Integer(x) -// } -// } - -// impl From> for Vector { -// fn from(x: Rep) -> Self { -// Vector::Logical(x) -// } -// } - -// impl From> for Vector { -// fn from(x: Rep) -> Self { -// Vector::Character(x) -// } -// } - impl From> for Vector { fn from(x: Vec) -> Self { Vector::Double(x.into()) diff --git a/src/object/vector/mod.rs b/src/object/vector/mod.rs index 630c962..1db5ee6 100644 --- a/src/object/vector/mod.rs +++ b/src/object/vector/mod.rs @@ -7,7 +7,6 @@ pub mod coercion; pub mod iterators; pub mod rep; -pub mod reptype; pub mod types; mod subsets; diff --git a/src/object/vector/rep.rs b/src/object/vector/rep.rs index f790dc7..d9753cb 100644 --- a/src/object/vector/rep.rs +++ b/src/object/vector/rep.rs @@ -1,128 +1,351 @@ -use std::cell::{Ref, RefCell, RefMut}; -use std::fmt::{Debug, Display}; +use std::fmt::Debug; +use std::fmt::Display; use std::iter::repeat; use super::coercion::{AtomicMode, CoercibleInto, CommonCmp, CommonNum, MinimallyNumeric}; -use super::reptype::{ - IntoIterableRefNames, IntoIterableRefPairs, IntoIterableRefValues, IterablePairs, - IterableValues, Naming, RepType, -}; use super::subset::Subset; +use super::subsets::Subsets; use super::types::*; use super::{OptionNA, Pow, VecPartialCmp}; use crate::error::Error; use crate::lang::Signal; -use crate::object::{CowObj, Obj, Subsets, ViewMut}; +use crate::object::{CowObj, ViewMut}; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::rc::Rc; + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct Naming { + // TODO: change this to usize and not Vec (after making names unique) + pub map: CowObj>>, + pub names: CowObj>>, +} -/// Vector Representation -/// -/// The ref-cell is used so vectors can change there internal representation, -/// e.g. by materializing. -#[derive(Debug, PartialEq)] -pub struct Rep(pub RefCell>); +impl Naming { + /// Create an empty `Naming` + pub fn new() -> Self { + Naming::default() + } -impl Clone for Rep { - fn clone(&self) -> Self { - match self.borrow().clone() { - RepType::Subset(v, s, n) => Rep(RefCell::new(RepType::Subset( - v.clone(), - s.clone(), - n.clone(), - ))), + /// Create a naming with the given `capacity`. + pub fn with_capacity(capacity: usize) -> Self { + Self { + map: HashMap::>::with_capacity(capacity).into(), + names: CowObj::from(Vec::::with_capacity(capacity)), } } + + /// Push a new name onto the `Naming`. + pub fn push_name(&self, name: OptionNA) { + self.names.with_inner_mut(|v| v.push(name.clone())); + if let OptionNA::Some(name) = name { + let n = self.names.len() - 1; + self.map.with_inner_mut(|map| { + let indices = map.entry(name.clone()).or_default(); + if !indices.contains(&n) { + indices.push(n); + }; + }); + }; + } + + /// Get mutable access to the internal data (map and names vector) via the passed closure. + pub fn with_inner_mut(&self, f: F) -> R + where + F: FnOnce(&mut HashMap>, &mut Vec>) -> R, + { + self.map + .with_inner_mut(|map| self.names.with_inner_mut(|names| f(map, names))) + } } -impl ViewMut for Rep { - fn view_mut(&self) -> Self { - Self(RefCell::new(self.borrow().view_mut())) +impl From> for Rep { + fn from(value: Vec<(Character, T)>) -> Self { + let mut names = Vec::with_capacity(value.len()); + let mut values = Vec::with_capacity(value.len()); + for (k, v) in value { + names.push(k); + values.push(v); + } + + Rep::Subset( + CowObj::new(Rc::new(RefCell::new(Rc::new(values)))), + Subsets::default(), + Option::Some(Naming::from(names)), + ) } } -impl Rep { - /// Get the inner value mutably. - /// This is used for assignments like `list(1)[[1]] = 10`. - pub fn try_get_inner_mut(&self, subset: Subset) -> Result { - self.borrow().try_get_inner_mut(subset) +impl From>> for Naming { + fn from(value: CowObj>) -> Self { + let mut map: HashMap> = HashMap::new(); + + value.iter().enumerate().for_each(|(i, maybe_name)| { + if let OptionNA::Some(name) = maybe_name { + let indices = map.entry(name.clone()).or_default(); + if !indices.contains(&i) { + indices.push(i); + }; + }; + }); + + Self { map: map.into(), names: value } + } +} + +/// Vector +#[derive(Debug, PartialEq)] +pub enum Rep { + // Vector::Subset encompasses a "raw" vector (no subsetting) + Subset(CowObj>, Subsets, Option), + // Iterator includes things like ranges 1:Inf, and lazily computed values + // Iter(Box>) +} + +impl Clone for Rep { + fn clone(&self) -> Self { + match self { + Rep::Subset(v, s, n) => Rep::Subset(v.clone(), s.clone(), n.clone()), + } } +} +impl Default for Rep { + fn default() -> Self { + Self::new() + } +} + +impl Rep { /// Get a cloned version of the inner value. /// This is used for accessing inner values like `list(1)[[1]]`. pub fn try_get_inner(&self, subset: Subset) -> Result { #[allow(clippy::map_clone)] self.try_get_inner_mut(subset).map(|x| x.clone()) } + /// Retrieve the internal data as a mutable view. + /// This is important for lists for things like `l$a[1:2] = c(10, 11)` + pub fn try_get_inner_mut(&self, subset: Subset) -> Result { + let new_subset = self.subset(subset); + match new_subset { + Rep::Subset(..) => { + let mut iter = new_subset.iter_subset_indices(); + + if let Some(i) = iter.next() { + if iter.next().is_some() { + return Error::Other("subset has length > 1".to_string()).into(); + } + + // TODO: subsetting with NA should not be possible. + let i = i.unwrap(); + + Ok(self.with_inner_mut(|values| values[i].view_mut())) + } else { + Error::Other("subset is empty".to_string()).into() + } + } + } + } } -impl Rep { - /// Iterate over the owned names and values of the vector. - pub fn iter_pairs(&self) -> IterablePairs { - self.0.borrow().clone().iter_pairs() +pub struct IntoIterableRefNames { + names: Rc>, + na_name: Character, + iter: Box>>, +} + +pub struct RepIterableNames<'a> { + names: &'a [Character], + na_name: &'a Character, + iter: &'a mut Box>>, +} + +impl IntoIterableRefNames { + pub fn iter(&mut self) -> RepIterableNames<'_> { + let names = &self.names[..]; + RepIterableNames { + names, + na_name: &self.na_name, + iter: &mut self.iter, + } } } -impl Rep -where - T: Clone + Default, -{ - /// Return the only value if the vector has length 1. - pub fn as_scalar(&self) -> Option { - let mut into_iter = self.values_ref(); - let mut iter = into_iter.iter(); - if let Some(x) = iter.next() { - if iter.next().is_none() { - return Some(x.clone()); - } - }; - None +impl<'a> Iterator for RepIterableNames<'a> { + type Item = &'a Character; + + fn next(&mut self) -> Option { + if let Some(i) = self.iter.next()? { + Some(&self.names[i]) + } else { + Some(self.na_name) + } } +} + +pub struct IntoIterableRefValues { + values: Rc>, + na_value: T, + iter: Box>>, +} + +impl IntoIterableRefValues { + pub fn iter(&mut self) -> IterableRefValues<'_, T> { + let values = &self.values[..]; - pub fn borrow(&self) -> Ref> { - self.0.borrow() + IterableRefValues { + values, + na_value: &self.na_value, + iter: &mut self.iter, + } } +} + +pub struct IntoIterableRefPairs { + values: Rc>, + names: Option>>, + na_value: T, + na_name: Character, + iter: Box>>, +} + +impl IntoIterableRefPairs { + pub fn iter(&mut self) -> IterableRefPairs<'_, T> { + let values = &self.values[..]; - pub fn borrow_mut(&mut self) -> RefMut> { - self.0.borrow_mut() + let names = self.names.as_ref().map(|names| &names[..]); + + IterableRefPairs { + values, + names, + na_value: &self.na_value, + na_name: &self.na_name, + iter: &mut self.iter, + } } +} - /// Iterate over the (owned) values of the vector. - pub fn iter_values(&self) -> IterableValues { - self.0.borrow().iter_values() +pub struct IterableRefValues<'a, T: Clone> { + values: &'a [T], + na_value: &'a T, + iter: &'a mut Box>>, +} + +pub struct IterableRefPairs<'a, T: Clone> { + values: &'a [T], + names: Option<&'a [Character]>, + na_value: &'a T, + na_name: &'a Character, + iter: &'a mut Box>>, +} + +impl<'a, T: Clone> Iterator for IterableRefPairs<'a, T> { + type Item = (&'a Character, &'a T); + + fn next(&mut self) -> Option { + if let Some(i) = self.iter.next()? { + if let Some(names) = self.names { + Option::Some((&names[i], &self.values[i])) + } else { + Option::Some((self.na_name, &self.values[i])) + } + } else { + Option::Some((self.na_name, self.na_value)) + } } +} - /// Iterate over the names of the vector (if they exist). - pub fn iter_names(&self) -> Option> { - self.0.borrow().iter_names() +impl<'a, T: Clone> Iterator for IterableRefValues<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + if let Some(i) = self.iter.next()? { + Some(&self.values[i]) + } else { + Some(self.na_value) + } + } +} + +impl ViewMut for Rep { + fn view_mut(&self) -> Self { + match self { + Rep::Subset(v, s, n) => Rep::Subset(v.view_mut(), s.clone(), n.clone()), + } } +} - fn materialize_inplace(&self) -> &Self { - // TODO: Rewrite this to avoid copying unnecessarily - let new_repr = { self.borrow().materialize() }; - self.0.replace(new_repr); +pub struct IterableValues { + values: Rc>, + iter: Box>>, +} - self +impl Iterator for IterableValues { + type Item = T; + fn next(&mut self) -> Option { + // FIXME: Already assumes no indexing with NA + let i = self.iter.next()?.unwrap(); + Some(self.values[i].clone()) } +} - /// Reindex the mapping from names to indices using the names vector from the `Naming`. - pub fn reindex(&mut self) { - self.borrow_mut().reindex() +pub struct IterablePairs { + values: Rc>, + names: Option>>, + iter: Box>>, +} + +impl Iterator for IterablePairs { + type Item = (Character, T); + fn next(&mut self) -> Option { + // FIXME: Already assumes no indexing with NA + let i = self.iter.next()?.unwrap(); + let value = self.values[i].clone(); + let name = if let Some(names) = &self.names { + names[i].clone() + } else { + Character::NA + }; + Some((name, value)) } +} - /// Set the names of the vector. - pub fn set_names(&self, names: CowObj>>) { - let new_repr = self.borrow().materialize().set_names(names); - self.0.replace(new_repr); +impl Rep { + /// Create an empty vector + /// + /// The primary use case for this function is to support testing, and there + /// are few expected use cases outside. It is used for creating a vector + /// of an explicit atomic type, likely to be tested with + /// `SameType::is_same_type_as`. + /// + /// ``` + /// use r::utils::*; + /// use r::object::Vector; + /// use r::object::OptionNA; + /// + /// let result = Vector::from(vec![1, 2, 3]); + /// let expect = Vector::from(Vec::>::new()); + /// + /// assert!(result.is_same_type_as(&expect)) + /// ``` + /// + pub fn new() -> Self { + Rep::Subset( + Vec::new().into(), + Subsets(Vec::new()), + Some(Naming::default()), + ) } /// Whether the vector representation has names. pub fn is_named(&self) -> bool { - matches!(*self.borrow(), RepType::Subset(.., Some(_))) + matches!(self, Rep::Subset(.., Some(_))) } /// Return the names of the vector if there are any. pub fn names(&self) -> Option>> { - match self.borrow().clone() { - RepType::Subset(_, s, n) => { + match self.clone() { + Rep::Subset(_, s, n) => { if s.is_empty() { n.map(|n| n.clone().names) } else if n.is_some() { @@ -139,154 +362,421 @@ where } } - pub fn dedup_last(self) -> Self { - self.0.into_inner().dedup_last().into() + // fn materialize_inplace(&self) -> &Self { + // // TODO: Rewrite this to avoid copying unnecessarily + // let new_repr = { self.borrow().materialize() }; + // self.0.replace(new_repr); + + // self + // } + + pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { + match &self { + Rep::Subset(..) => { + let err = Error::Other("subset must have length 1".to_string()); + + let mut iter = self.clone().subset(subset).iter_subset_indices(); + let i1 = iter.next(); + + // check that subset has exactly length 1 + // assumes no indexing with NA (unwrap the option) + let i = if let Some(i) = i1 { + if iter.next().is_some() { + return err.into(); + } + i + } else { + return err.into(); + } + .unwrap(); + + self.with_inner_mut(|v| v[i] = value.clone()); + Ok(value.clone()) + } + } } - /// Constructs a new, empty `Rep` with at least the specified `capacity`. - /// Names are only include if `names` is true. - pub fn with_capacity(capacity: usize, names: bool) -> Self { - let naming = if names { - Some(Naming::with_capacity(capacity)) - } else { - None - }; - Self(RefCell::new(RepType::Subset( - CowObj::from(Vec::with_capacity(capacity)), - Subsets::default(), - naming, - ))) + pub fn values_ref(&self) -> IntoIterableRefValues { + match self.clone() { + Rep::Subset(values, ..) => { + let iter = Box::new(self.iter_subset_indices()); + let values = values.inner_rc(); + + IntoIterableRefValues { values, na_value: T::default(), iter } + } + } + } + + pub fn names_ref(&self) -> Option { + match self.clone() { + Rep::Subset(.., naming) => { + let iter = Box::new(self.iter_subset_indices()); + let naming = naming?; + let names = naming.names.inner_rc(); + + Some(IntoIterableRefNames { names, na_name: Character::default(), iter }) + } + } } - /// Get an `RepTypeIntoIterablePairs` which in turn can be converted into an iterator over - /// pairs of references (&name, &value). - /// - /// Directly getting an iterator is not possible due to lifetime issues. pub fn pairs_ref(&self) -> IntoIterableRefPairs { - self.0.borrow().pairs_ref() + match self.clone() { + Rep::Subset(values, _, maybe_naming) => { + let iter = Box::new(self.iter_subset_indices()); + let values = values.inner_rc(); + let names = maybe_naming.map(|x| x.names.inner_rc()); + + IntoIterableRefPairs { + values, + names, + na_value: T::default(), + na_name: Character::NA, + iter, + } + } + } } - /// Get an `Option>` which in turn can be converted into an iterator over - /// references to the values. - /// The `None` variant is returned if the `Rep` is not named. - /// - /// Directly getting an iterator is not possible due to lifetime issues. - pub fn values_ref(&self) -> IntoIterableRefValues { - self.0.borrow().values_ref() + pub fn iter_pairs(&self) -> IterablePairs { + match self.clone() { + Rep::Subset(values, _, maybe_naming) => { + let iter = Box::new(self.iter_subset_indices()); + let values = values.inner_rc(); + let names = maybe_naming.map(|x| x.names.inner_rc()); + + IterablePairs { values, names, iter } + } + } } - /// Get an `RepTypeIntoIterableValues` which in turn can be converted into an iterator over - /// references to the names. - /// - /// Directly getting an iterator is not possible due to lifetime issues. - pub fn names_ref(&self) -> Option { - self.0.borrow().names_ref() + pub fn iter_values(&self) -> IterableValues { + match self.clone() { + Rep::Subset(values, ..) => { + let iter = Box::new(self.iter_subset_indices()); + IterableValues { values: values.inner_rc(), iter } + } + } } - pub fn materialize(&self) -> Self { - self.borrow().materialize().into() + pub fn iter_names(&self) -> Option> { + match self.clone() { + Rep::Subset(.., maybe_naming) => { + let iter = Box::new(self.iter_subset_indices()); + let names = maybe_naming.map(|x| x.names.inner_rc())?; + + Some(IterableValues { values: names, iter }) + } + } } - /// Create an empty vector - /// - /// The primary use case for this function is to support testing, and there - /// are few expected use cases outside. It is used for creating a vector - /// of an explicit atomic type, likely to be tested with - /// `SameType::is_same_type_as`. - /// - /// ``` - /// use r::utils::*; - /// use r::object::Vector; - /// use r::object::OptionNA; - /// - /// let result = Vector::from(vec![1, 2, 3]); - /// let expect = Vector::from(Vec::>::new()); - /// - /// assert!(result.is_same_type_as(&expect)) - /// ``` - /// - pub fn new() -> Self { - RepType::new().into() + pub fn push_value(&self, value: T) { + self.push_named(Character::NA, value); + } + + pub fn push_named(&self, name: OptionNA, value: T) { + match self { + Rep::Subset(values, Subsets(subsets), maybe_naming) => match subsets.as_slice() { + [] => { + values.with_inner_mut(|values| values.push(value)); + if let Some(naming) = maybe_naming { + naming.push_name(name) + } + } + _ => unimplemented!(), + }, + } + } + + pub fn iter_subset_indices_exact(&self) -> ExactIterSubsetIndices { + // TODO(performance): Avoid the vector allocation + let iter = self.iter_subset_indices(); + let len = iter.count(); + let iter = self.iter_subset_indices(); + ExactIterSubsetIndices { iter, len } } + pub fn iter_subset_indices(&self) -> Box>> { + match self.clone() { + Rep::Subset(vals, subsets, maybe_naming) => { + if subsets.is_empty() { + return Box::new((0_usize..vals.len()).map(Some)); + } + + if let Some(naming) = maybe_naming { + Box::new(subsets.bind_names(naming.map).into_iter().map(|(_, y)| y)) + } else { + Box::new(subsets.into_iter().map(|(_, y)| y)) + } + } + } + } + + /// Reindex the mapping from names to indices. + pub fn reindex(&mut self) { + if let Rep::Subset(.., Some(naming)) = self { + naming.map.with_inner_mut(|map| { + map.drain(); + + for (i, maybe_name) in naming.names.borrow().iter().enumerate() { + if let OptionNA::Some(name) = maybe_name { + let indices = map.entry(name.clone()).or_default(); + if !indices.contains(&i) { + indices.push(i) + } + } + } + }) + } + } + + pub fn dedup_last(self) -> Self { + match self { + Rep::Subset(values, subsets, Some(naming)) => { + naming.with_inner_mut(|map, names| { + let mut dups: Vec = map + .iter() + .flat_map(|(_, indices)| { + indices + .split_last() + .map_or(vec![], |(_, leading_dups)| leading_dups.to_vec()) + }) + .collect(); + + dups.sort(); + + values.with_inner_mut(|vs| { + for i in dups.into_iter().rev() { + vs.remove(i); + names.remove(i); + } + }); + + for (_, indices) in map.iter_mut() { + indices.drain(0..(indices.len())); + } + }); + Rep::Subset(values, subsets, Some(naming)) + } + Rep::Subset(.., None) => self, + } + } + + pub fn set_names(&self, names: CowObj>) -> Self { + match self { + Rep::Subset(v, s, _) => Rep::Subset(v.clone(), s.clone(), Option::Some(names.into())), + } + } + + /// Access a lazy copy of the internal vector data pub fn inner(&self) -> CowObj> { - self.borrow().inner() + match self.materialize() { + Rep::Subset(v, ..) => v.clone(), + } } - pub fn len(&self) -> usize { - // TODO: Only materialize when necessary - self.materialize_inplace(); - self.borrow().len() + /// Get mutable access to the internal vector through the passed closure. + pub fn with_inner_mut(&self, f: F) -> R + where + F: FnOnce(&mut Vec) -> R, + { + match self { + Rep::Subset(v, ..) => v.with_inner_mut(f), + } } /// Subsetting a Vector /// /// Introduce a new subset into the aggregate list of subset indices. - /// pub fn subset(&self, subset: Subset) -> Self { - (*self.borrow()).subset(subset).into() + match self { + Rep::Subset(v, Subsets(subsets), n) => { + let mut subsets = subsets.clone(); + subsets.push(subset); + Rep::Subset(v.view_mut(), Subsets(subsets), n.clone()) + } + } } + pub fn len(&self) -> usize { + match self { + Rep::Subset(v, Subsets(s), _) => match s.as_slice() { + [] => v.borrow().len(), + _ => self.values_ref().iter().count(), + }, + } + } #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn get(&self, index: usize) -> Option { - let x = self.borrow().get(index); - x.map(|x| x.into()) + /// Get a single element from a vector + /// + /// Access a single element without materializing a new vector + /// + pub fn get(&self, index: usize) -> Option> + where + T: Clone, + { + match self { + Rep::Subset(v, subsets, _) => { + let vb = v.borrow(); + let index = subsets.get_index_at(index)?; + let elem = vb.get(index)?; + Some(Rep::Subset( + vec![elem.clone()].into(), + Subsets::new(), + Option::Some(Naming::new()), + )) + } + } } - /// Change a value at the location given by `subset` to the provided `value`. - /// If the `subset` does not have length `1`, an error is returned. - pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { - // Used for `[[`-assignment. - self.0.borrow_mut().set_subset(subset, value) + /// Assignment to Subset Indices + /// + /// Assignment to a vector from another. The aggregate subsetted indices + /// are iterated over while performing the assignment. + /// + pub fn assign(&mut self, value: Rep) -> Result + where + T: Clone + Default + From, + R: Default + Clone, + { + let l_indices = self.iter_subset_indices_exact(); + let mut r_indices = value.iter_subset_indices_exact(); + + // TODO(performance): When we clone the interior data of self (to which we write) + // we don't have to perform recycling checks + // and just start iterating. We can always discard the result afterwards again + // Maybe implement filter_exact on (named)subsets + if r_indices.len() == 1 { + // get the element from reptype value + let index = r_indices + .next() + .expect("index should exist") + .expect("No NA for subsetting"); + let elem = value.get_inner(index).expect("element should exist"); + match (self, value) { + (Rep::Subset(lv, ls, ln), Rep::Subset(..)) => { + lv.with_inner_mut(|lvb| { + for li in l_indices { + lvb[li.unwrap()] = elem.clone().into(); + } + }); + return Ok(Rep::Subset(lv.clone(), ls.clone(), ln.clone())); + } + } + } + + if l_indices.len() != r_indices.len() { + return Err(Signal::Error(Error::NonRecyclableLengths( + l_indices.len(), + r_indices.len(), + ))); + } + + match (self, value) { + (Rep::Subset(lv, ls, ln), Rep::Subset(rv, ..)) => { + lv.with_inner_mut(|lvb| { + let rvc = rv.clone(); + let rvb = rvc.borrow(); + + for (li, ri) in l_indices.zip(r_indices) { + match (li, ri) { + (Some(li), None) => lvb[li] = T::default(), + (Some(li), Some(ri)) => lvb[li] = rvb[ri % rvb.len()].clone().into(), + _ => (), + } + } + }); + + Ok(Rep::Subset(lv.clone(), ls.clone(), ln.clone())) + } + } } - /// Push a named `value` with a given `name` onto the `Rep`. - pub fn push_named(&self, name: OptionNA, value: T) { - self.borrow().push_named(name, value) + /// Return the only value if the vector has length 1. + pub fn as_scalar(&self) -> Option { + let mut into_iter = self.values_ref(); + let mut iter = into_iter.iter(); + if let Some(x) = iter.next() { + if iter.next().is_none() { + return Some(x.clone()); + } + }; + None } - /// Assign to the vector, often with a view through a Subset. - /// An error is thrown if the lengths are not compatible. - pub fn assign(&mut self, value: Rep) -> Result + /// Materialize a Vector + /// + /// Apply subsets and clone values into a new vector. + pub fn materialize(&self) -> Self where - T: From + Clone, - R: Clone + Default, + T: Clone, { - self.0 - .borrow_mut() - .assign(value.0.into_inner()) - .map(|x| x.into()) + match self { + Rep::Subset(v, subsets, naming) => { + // early exit when there is nothing to do + match subsets { + Subsets(s) => { + if s.as_slice().is_empty() { + return self.clone(); + } + } + } + + let vc = v.clone(); + let vb = vc.borrow(); + let mut res: Vec = vec![]; + let vb_len = vb.len(); + + let new_naming = Naming::new(); + + let iter = subsets.clone().into_iter().take_while(|(i, _)| i < &vb_len); + + for (_, i) in iter { + match i { + Some(i) => { + res.push(vb[i].clone()); + if let Option::Some(n) = naming { + new_naming.push_name(n.names.borrow()[i].clone()) + }; + } + // default is NA + None => { + res.push(T::default()); + // When we subset with NA, there is no name for this entry; + new_naming.push_name(OptionNA::NA); + } + } + } + + Rep::Subset(res.into(), Subsets(vec![]), Option::None) + } + } } - /// Test the mode of the internal vector type - /// - /// Internally, this is defined by the [crate::object::coercion::AtomicMode] - /// implementation of the vector's element type. - /// + pub fn is_double(&self) -> bool where T: AtomicMode, { T::is_double() } - /// See [Self::is_double] for more information + pub fn is_logical(&self) -> bool where T: AtomicMode, { T::is_logical() } - /// See [Self::is_double] for more information + pub fn is_integer(&self) -> bool where T: AtomicMode, { T::is_integer() } - /// See [Self::is_double] for more information + pub fn is_character(&self) -> bool where T: AtomicMode, @@ -294,161 +784,194 @@ where T::is_character() } - /// Convert a Vector into a vector of a specific class of internal type - /// - /// The internal type only needs to satisfy - /// [crate::object::coercion::CoercibleInto] for the `Mode`, and for the `Mode` - /// type to implement [crate::object::coercion::AtomicMode]. Generally, - /// this is used more directly via [Self::as_logical], [Self::as_integer], - /// [Self::as_double] and [Self::as_character], which predefine the output - /// type of the mode. - /// - /// ``` - /// use r::object::Vector; - /// use r::object::OptionNA; - /// - /// let x = Vector::from(vec![false, true, true, false]); - /// let n = x.as_double(); - /// - /// assert_eq!(n, Vector::from(vec![ - /// OptionNA::Some(0_f64), - /// OptionNA::Some(1_f64), - /// OptionNA::Some(1_f64), - /// OptionNA::Some(0_f64) - /// ])) - /// ``` - /// pub fn as_mode(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, Mode: Clone, { - Rep(RefCell::new(self.borrow().as_mode())) + match self { + Rep::Subset(v, subsets, naming) => { + let vc = v.clone(); + let vb = vc.borrow(); + + let num_vec: Vec = vb.iter().map(|i| (*i).clone().coerce_into()).collect(); + + Rep::Subset(num_vec.into(), subsets.clone(), naming.clone()) + } + } } - /// See [Self::as_mode] for more information pub fn as_logical(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } - /// See [Self::as_mode] for more information pub fn as_integer(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } - /// See [Self::as_mode] for more information pub fn as_double(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } - /// See [Self::as_mode] for more information pub fn as_character(&self) -> Rep where - T: CoercibleInto + AtomicMode, + T: CoercibleInto, { self.as_mode::() } -} -impl Default for Rep -where - T: Clone + Default, -{ - fn default() -> Self { - Rep(RefCell::new(RepType::default())) + pub fn get_inner(&self, index: usize) -> Option { + match self { + Rep::Subset(v, subsets, maybe_naming) => { + if maybe_naming.is_some() { + // TODO(NOW) + unimplemented!() + } + let vb = v.borrow(); + let index = subsets.get_index_at(index)?; + vb.get(index).cloned() + } + } } } -impl From> for Rep -where - T: Clone + Default, -{ - fn from(rep: Vec) -> Self { - Rep(RefCell::new(RepType::from(CowObj::from(rep)))) - } +pub struct ExactIterSubsetIndices { + iter: Box>>, + len: usize, } -impl From>> for Rep -where - T: Clone + Default, -{ - fn from(rep: CowObj>) -> Self { - Rep(RefCell::new(rep.into())) +impl ExactSizeIterator for ExactIterSubsetIndices { + fn len(&self) -> usize { + self.len } } -impl From> for Rep -where - T: Clone + Default, -{ - fn from(rep: RepType) -> Self { - Rep(RefCell::new(rep)) +impl Iterator for ExactIterSubsetIndices { + type Item = Option; + fn next(&mut self) -> Option { + self.iter.next() } } -// TODO: I think this should err when rep has length > 1 impl TryInto for Rep> where OptionNA: AtomicMode + Clone + CoercibleInto>, - T: 'static, { type Error = (); fn try_into(self) -> Result { - self.iter_pairs() - .next() - .map(|(_, x)| x) - .map_or( - Err(()), - |i| match CoercibleInto::>::coerce_into(i) { - OptionNA::Some(x) => Ok(x), - OptionNA::NA => Err(()), - }, - ) + self.get_inner(0).map_or( + Err(()), + |i| match CoercibleInto::>::coerce_into(i) { + OptionNA::Some(x) => Ok(x), + OptionNA::NA => Err(()), + }, + ) } } -impl From> for Rep { - fn from(value: Vec<(Character, Obj)>) -> Self { - Rep(RefCell::new(value.into())) +impl From> for Naming { + fn from(value: Vec) -> Self { + let naming = Naming::new(); + for k in value { + naming.push_name(k); + } + naming + } +} + +impl From>> for Rep { + fn from(value: CowObj>) -> Self { + Rep::Subset(value, Subsets::default(), Option::None) + } +} + +impl From, T)>> for Rep { + fn from(value: Vec<(Option, T)>) -> Self { + let mut names = Vec::with_capacity(value.len()); + let mut values = Vec::with_capacity(value.len()); + for (k, v) in value.into_iter() { + names.push(k.map_or(Character::NA, Character::Some)); + values.push(v) + } + let naming = Naming::from(names); + Rep::Subset(values.into(), Subsets::default(), Some(naming)) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) + } +} + +impl From>> for Rep { + fn from(value: Vec>) -> Self { + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } impl From> for Rep { fn from(value: Vec) -> Self { - Rep(RefCell::new(value.into())) + let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); + Rep::Subset(value.into(), Subsets(Vec::new()), Option::None) } } -impl From, T)>> for Rep { - fn from(value: Vec<(Option, T)>) -> Self { - Rep(RefCell::new(value.into())) +impl From<(Vec, Subsets)> for Rep +where + Rep: From>, + T: Clone, +{ + fn from(value: (Vec, Subsets)) -> Self { + match Self::from(value.0) { + Rep::Subset(v, ..) => Rep::Subset(v, value.1, Option::None), + } } } @@ -578,7 +1101,7 @@ impl std::ops::Neg for Rep where L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, LNum: std::ops::Neg, - RepType: From>, + Rep: From>, O: Clone, { type Output = Result, Signal>; @@ -587,7 +1110,7 @@ where .iter_values() .map(|x| -(CoercibleInto::::coerce_into(x))) .collect(); - Ok(Rep(RefCell::new(result.into()))) + Ok(result.into()) } } @@ -716,7 +1239,7 @@ where .iter_values() .map(|x| !(CoercibleInto::::coerce_into(x))) .collect(); - Ok(Rep(RefCell::new(result.into()))) + Ok(result.into()) } } @@ -907,3 +1430,367 @@ where f(c1, c2) }) } +#[cfg(test)] +mod test { + use super::OptionNA::*; + use crate::object::rep::Rep; + use crate::object::{types::*, OptionNA, VecPartialCmp}; + use crate::r; + use crate::utils::SameType; + + #[test] + fn vector_add() { + let x = Rep::::from((1..=5).collect::>()); + let y = Rep::::from(vec![2, 5, 6, 2, 3]); + + let z = (x + y).unwrap(); + assert_eq!(z, Rep::from(vec![3, 7, 9, 6, 8])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_integer()); + } + + #[test] + fn vector_mul() { + let x = Rep::::from((1..=5).collect::>()); + let y = Rep::::from(vec![Some(2), NA, Some(6), NA, Some(3)]); + + let z = (x * y).unwrap(); + assert_eq!(z, Rep::from(vec![Some(2), NA, Some(18), NA, Some(15),])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_integer()); + } + + #[test] + fn vector_common_mul_f32_na() { + // expect that f32's do not get coerced into an OptionNA:: instead + // using std::f32::NAN as NA representation. + + let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); + let y = Rep::::from(vec![100, 10, 1]); + + let z = (x * y).unwrap(); + // assert_eq!(z, Vector::from(vec![0_f32, std::f32::NAN, 1_000_f32])); + // comparing floats is error prone + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_double()); + } + + #[test] + fn vector_and() { + // expect that f32's do not get coerced into an OptionNA:: instead + // using std::f32::NAN as NA representation. + + let x = Rep::::from(vec![Some(0_f64), NA, Some(10_f64)]); + let y = Rep::::from(vec![100, 10, 1]); + + let z = (x & y).unwrap(); + assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_logical()); + } + + #[test] + fn vector_gt() { + // expect that f32's do not get coerced into an instead + // using std::f32::NAN as NA representation. + + let x = Rep::from(vec![Some(0_f64), NA, Some(10000_f64)]); + let y = Rep::::from(vec![100, 10, 1]); + + let z = x.vec_gt(y).unwrap(); + assert_eq!(z, Rep::from(vec![Some(false), NA, Some(true)])); + + let expected_type = Rep::::new(); + assert!(z.is_same_type_as(&expected_type)); + assert!(z.is_logical()); + } + + #[test] + fn test_iter_values() { + // Create values as Vec + let values = vec![1, 2, 3, 4, 5]; + + // Create Rep from values + let rep = Rep::from(values.clone()); + + // Use iter_values to get an iterator and collect values + let collected_values: Vec = rep.iter_values().collect(); + + // Expected values as Vec> + let expected_values: Vec = values.into_iter().map(OptionNA::Some).collect(); + + // Assert collected values match expected values + assert_eq!(collected_values, expected_values); + } + + #[test] + fn test_iter_names() { + // Create values with names + let values_with_names = vec![ + (Character::Some(String::from("a")), 1), + (Character::Some(String::from("b")), 2), + (Character::NA, 3), + (Character::Some(String::from("d")), 4), + (Character::NA, 5), + ]; + + // Create Rep from values with names + let rep = Rep::from(values_with_names.clone()); + + // Use iter_names to get an iterator + let names_iter = rep.iter_names(); + + // Ensure iter_names is Some iterator + assert!(names_iter.is_some()); + + // Collect names + let collected_names: Vec = names_iter.unwrap().collect(); + + // Expected names + let expected_names: Vec = values_with_names + .iter() + .map(|(name_opt, _)| match name_opt { + Some(name) => Character::Some(name.clone()), + Character::NA => Character::NA, + }) + .collect(); + + // Assert collected names match expected names + assert_eq!(collected_names, expected_names); + } + + use crate::object::{Obj, Vector}; + // The tests below don't test the subsetting mechanism, which is instead tested in subsets.rs + #[test] + fn iter_pairs_mixed_names() { + let x = r!(c(a = 1, 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_pairs() + } else { + unreachable!() + }; + + assert_eq!( + x.next().unwrap(), + (Character::Some("a".to_string()), Double::Some(1.0)) + ); + assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); + assert_eq!(x.next(), None); + } + + #[test] + fn iter_pairs_no_names() { + let x = r!(c(1, 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_pairs() + } else { + unreachable!() + }; + + assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(1.0))); + assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); + assert_eq!(x.next(), None); + } + + #[test] + fn iter_values() { + let x = r!(c(1, 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_values() + } else { + unreachable!() + }; + + assert_eq!(x.next().unwrap(), Double::Some(1.0)); + assert_eq!(x.next().unwrap(), Double::Some(2.0)); + assert_eq!(x.next(), None); + } + + #[test] + fn iter_names_none() { + let x = r!(c(1, 2)).unwrap(); + + let x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_names() + } else { + unreachable!() + }; + + assert!(x.is_none()) + } + + #[test] + fn iter_names_some() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.iter_names().unwrap() + } else { + unreachable!() + }; + + assert_eq!(x.next().unwrap(), Character::NA); + assert_eq!(x.next().unwrap(), Character::Some("b".to_string())); + assert_eq!(x.next(), None); + } + + #[test] + fn names_ref_iter_some() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.names_ref().unwrap() + } else { + unreachable!() + }; + + let mut x = x.iter(); + + assert_eq!(x.next().unwrap(), &Character::NA); + assert_eq!(x.next().unwrap(), &Character::Some("b".to_string())); + assert_eq!(x.next(), None); + } + + #[test] + #[should_panic] + fn names_ref_iter_none() { + let x = r!(c(1, 2)).unwrap(); + + if let Obj::Vector(Vector::Double(r)) = x { + r.names_ref().unwrap() + } else { + unreachable!() + }; + } + + #[test] + fn values_ref_iter() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.values_ref() + } else { + unreachable!() + }; + + let mut x = x.iter(); + + assert_eq!(x.next().unwrap(), &Double::Some(1.0)); + assert_eq!(x.next().unwrap(), &Double::Some(2.0)); + assert_eq!(x.next(), None); + } + + #[test] + fn pairs_ref_iter() { + let x = r!(c(1, b = 2)).unwrap(); + + let mut x = if let Obj::Vector(Vector::Double(r)) = x { + r.pairs_ref() + } else { + unreachable!() + }; + + let mut x = x.iter(); + + assert_eq!(x.next().unwrap(), (&Character::NA, &Double::Some(1.0))); + assert_eq!( + x.next().unwrap(), + (&Character::Some("b".to_string()), &Double::Some(2.0)) + ); + assert_eq!(x.next(), None); + } + + use crate::error::Error; + use crate::lang::Signal; + + #[test] + fn assign_recycle_incompatible() { + let mut x = Rep::::from(vec![1, 2, 3]); + let y = Rep::::from(vec![99, 99]); + let result = x.assign(y); + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(3, 2)) + ); + } + #[test] + fn assign_recycle_length_one() { + let x = Rep::::from(vec![1, 2, 3]); + let y = Rep::::from(vec![99]); + let mut xview = x.subset(vec![0, 1].into()); + let _ = xview.assign(y).unwrap(); + let result_vec: Vec<_> = x.iter_values().collect(); + assert_eq!(result_vec, vec![Some(99), Some(99), Some(3)]) + } + #[test] + fn non_recyclable_lengths_3_2() { + let x = Rep::::from(vec![1, 2, 3]); + let y = Rep::::from(vec![99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(3, 2)) + ); + } + #[test] + fn non_recyclable_lengths_4_2() { + let x = Rep::::from(vec![1, 2, 3, 4]); + let y = Rep::::from(vec![99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(4, 2)) + ); + } + #[test] + fn non_recyclable_lengths_2_3() { + let x = Rep::::from(vec![1, 2]); + let y = Rep::::from(vec![99, 99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(2, 3)) + ); + } + #[test] + fn non_recyclable_lengths_2_4() { + let x = Rep::::from(vec![1, 2]); + let y = Rep::::from(vec![99, 99, 99, 99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(2, 4)) + ); + } + #[test] + fn non_recyclable_lengths_0_1() { + let x = Rep::::from(Vec::::new()); + let y = Rep::::from(vec![99]); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(0, 1)) + ); + } + #[test] + fn non_recyclable_lengths_1_0() { + let x = Rep::::from(vec![99]); + let y = Rep::::from(Vec::::new()); + let result = x + y; + assert_eq!( + result.unwrap_err(), + Signal::Error(Error::NonRecyclableLengths(1, 0)) + ); + } +} diff --git a/src/object/vector/reptype.rs b/src/object/vector/reptype.rs deleted file mode 100644 index 83f841a..0000000 --- a/src/object/vector/reptype.rs +++ /dev/null @@ -1,1819 +0,0 @@ -use std::fmt::Debug; -use std::fmt::Display; -use std::iter::repeat; - -use super::coercion::{AtomicMode, CoercibleInto, CommonCmp, CommonNum, MinimallyNumeric}; -use super::subset::Subset; -use super::subsets::Subsets; -use super::types::*; -use super::{OptionNA, Pow, VecPartialCmp}; -use crate::error::Error; -use crate::lang::Signal; -use crate::object::{CowObj, ViewMut}; -use hashbrown::HashMap; -use std::cell::RefCell; -use std::rc::Rc; - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct Naming { - // TODO: change this to usize and not Vec (after making names unique) - pub map: CowObj>>, - pub names: CowObj>>, -} - -impl Naming { - /// Create an empty `Naming` - pub fn new() -> Self { - Naming::default() - } - - /// Create a naming with the given `capacity`. - pub fn with_capacity(capacity: usize) -> Self { - Self { - map: HashMap::>::with_capacity(capacity).into(), - names: CowObj::from(Vec::::with_capacity(capacity)), - } - } - - /// Push a new name onto the `Naming`. - pub fn push_name(&self, name: OptionNA) { - self.names.with_inner_mut(|v| v.push(name.clone())); - if let OptionNA::Some(name) = name { - let n = self.names.len() - 1; - self.map.with_inner_mut(|map| { - let indices = map.entry(name.clone()).or_default(); - if !indices.contains(&n) { - indices.push(n); - }; - }); - }; - } - - /// Get mutable access to the internal data (map and names vector) via the passed closure. - pub fn with_inner_mut(&self, f: F) -> R - where - F: FnOnce(&mut HashMap>, &mut Vec>) -> R, - { - self.map - .with_inner_mut(|map| self.names.with_inner_mut(|names| f(map, names))) - } -} - -impl From> for RepType { - fn from(value: Vec<(Character, T)>) -> Self { - let mut names = Vec::with_capacity(value.len()); - let mut values = Vec::with_capacity(value.len()); - for (k, v) in value { - names.push(k); - values.push(v); - } - - RepType::Subset( - CowObj::new(Rc::new(RefCell::new(Rc::new(values)))), - Subsets::default(), - Option::Some(Naming::from(names)), - ) - } -} - -impl From>> for Naming { - fn from(value: CowObj>) -> Self { - let mut map: HashMap> = HashMap::new(); - - value.iter().enumerate().for_each(|(i, maybe_name)| { - if let OptionNA::Some(name) = maybe_name { - let indices = map.entry(name.clone()).or_default(); - if !indices.contains(&i) { - indices.push(i); - }; - }; - }); - - Self { map: map.into(), names: value } - } -} - -/// Vector -#[derive(Debug, PartialEq)] -pub enum RepType { - // Vector::Subset encompasses a "raw" vector (no subsetting) - Subset(CowObj>, Subsets, Option), - // Iterator includes things like ranges 1:Inf, and lazily computed values - // Iter(Box>) -} - -impl Clone for RepType { - fn clone(&self) -> Self { - match self { - RepType::Subset(v, s, n) => RepType::Subset(v.clone(), s.clone(), n.clone()), - } - } -} - -impl Default for RepType { - fn default() -> Self { - Self::new() - } -} - -impl RepType { - /// Get a cloned version of the inner value. - /// This is used for accessing inner values like `list(1)[[1]]`. - pub fn try_get_inner(&self, subset: Subset) -> Result { - #[allow(clippy::map_clone)] - self.try_get_inner_mut(subset).map(|x| x.clone()) - } - /// Retrieve the internal data as a mutable view. - /// This is important for lists for things like `l$a[1:2] = c(10, 11)` - pub fn try_get_inner_mut(&self, subset: Subset) -> Result { - let new_subset = self.subset(subset); - match new_subset { - RepType::Subset(..) => { - let mut iter = new_subset.iter_subset_indices(); - - if let Some(i) = iter.next() { - if iter.next().is_some() { - return Error::Other("subset has length > 1".to_string()).into(); - } - - // TODO: subsetting with NA should not be possible. - let i = i.unwrap(); - - Ok(self.with_inner_mut(|values| values[i].view_mut())) - } else { - Error::Other("subset is empty".to_string()).into() - } - } - } - } -} - -pub struct IntoIterableRefNames { - names: Rc>, - na_name: Character, - iter: Box>>, -} - -pub struct RepTypeIterableNames<'a> { - names: &'a [Character], - na_name: &'a Character, - iter: &'a mut Box>>, -} - -impl IntoIterableRefNames { - pub fn iter(&mut self) -> RepTypeIterableNames<'_> { - let names = &self.names[..]; - RepTypeIterableNames { - names, - na_name: &self.na_name, - iter: &mut self.iter, - } - } -} - -impl<'a> Iterator for RepTypeIterableNames<'a> { - type Item = &'a Character; - - fn next(&mut self) -> Option { - if let Some(i) = self.iter.next()? { - Some(&self.names[i]) - } else { - Some(self.na_name) - } - } -} - -pub struct IntoIterableRefValues { - values: Rc>, - na_value: T, - iter: Box>>, -} - -impl IntoIterableRefValues { - pub fn iter(&mut self) -> IterableRefValues<'_, T> { - let values = &self.values[..]; - - IterableRefValues { - values, - na_value: &self.na_value, - iter: &mut self.iter, - } - } -} - -pub struct IntoIterableRefPairs { - values: Rc>, - names: Option>>, - na_value: T, - na_name: Character, - iter: Box>>, -} - -impl IntoIterableRefPairs { - pub fn iter(&mut self) -> IterableRefPairs<'_, T> { - let values = &self.values[..]; - - let names = self.names.as_ref().map(|names| &names[..]); - - IterableRefPairs { - values, - names, - na_value: &self.na_value, - na_name: &self.na_name, - iter: &mut self.iter, - } - } -} - -pub struct IterableRefValues<'a, T: Clone> { - values: &'a [T], - na_value: &'a T, - iter: &'a mut Box>>, -} - -pub struct IterableRefPairs<'a, T: Clone> { - values: &'a [T], - names: Option<&'a [Character]>, - na_value: &'a T, - na_name: &'a Character, - iter: &'a mut Box>>, -} - -impl<'a, T: Clone> Iterator for IterableRefPairs<'a, T> { - type Item = (&'a Character, &'a T); - - fn next(&mut self) -> Option { - if let Some(i) = self.iter.next()? { - if let Some(names) = self.names { - Option::Some((&names[i], &self.values[i])) - } else { - Option::Some((self.na_name, &self.values[i])) - } - } else { - Option::Some((self.na_name, self.na_value)) - } - } -} - -impl<'a, T: Clone> Iterator for IterableRefValues<'a, T> { - type Item = &'a T; - - fn next(&mut self) -> Option { - if let Some(i) = self.iter.next()? { - Some(&self.values[i]) - } else { - Some(self.na_value) - } - } -} - -impl ViewMut for RepType { - fn view_mut(&self) -> Self { - match self { - RepType::Subset(v, s, n) => RepType::Subset(v.view_mut(), s.clone(), n.clone()), - } - } -} - -pub struct IterableValues { - values: Rc>, - iter: Box>>, -} - -impl Iterator for IterableValues { - type Item = T; - fn next(&mut self) -> Option { - // FIXME: Already assumes no indexing with NA - let i = self.iter.next()?.unwrap(); - Some(self.values[i].clone()) - } -} - -pub struct IterablePairs { - values: Rc>, - names: Option>>, - iter: Box>>, -} - -impl Iterator for IterablePairs { - type Item = (Character, T); - fn next(&mut self) -> Option { - // FIXME: Already assumes no indexing with NA - let i = self.iter.next()?.unwrap(); - let value = self.values[i].clone(); - let name = if let Some(names) = &self.names { - names[i].clone() - } else { - Character::NA - }; - Some((name, value)) - } -} - -impl RepType { - /// Create an empty vector - /// - /// The primary use case for this function is to support testing, and there - /// are few expected use cases outside. It is used for creating a vector - /// of an explicit atomic type, likely to be tested with - /// `SameType::is_same_type_as`. - /// - /// ``` - /// use r::utils::*; - /// use r::object::Vector; - /// use r::object::OptionNA; - /// - /// let result = Vector::from(vec![1, 2, 3]); - /// let expect = Vector::from(Vec::>::new()); - /// - /// assert!(result.is_same_type_as(&expect)) - /// ``` - /// - pub fn new() -> Self { - RepType::Subset( - Vec::new().into(), - Subsets(Vec::new()), - Some(Naming::default()), - ) - } - - /// Whether the vector representation has names. - pub fn is_named(&self) -> bool { - matches!(self, RepType::Subset(.., Some(_))) - } - - /// Return the names of the vector if there are any. - pub fn names(&self) -> Option>> { - match self.clone() { - RepType::Subset(_, s, n) => { - if s.is_empty() { - n.map(|n| n.clone().names) - } else if n.is_some() { - Some( - self.iter_names() - .expect("checked that names exist") - .collect::>() - .into(), - ) - } else { - None - } - } - } - } - - // fn materialize_inplace(&self) -> &Self { - // // TODO: Rewrite this to avoid copying unnecessarily - // let new_repr = { self.borrow().materialize() }; - // self.0.replace(new_repr); - - // self - // } - - pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { - match &self { - RepType::Subset(..) => { - let err = Error::Other("subset must have length 1".to_string()); - - let mut iter = self.clone().subset(subset).iter_subset_indices(); - let i1 = iter.next(); - - // check that subset has exactly length 1 - // assumes no indexing with NA (unwrap the option) - let i = if let Some(i) = i1 { - if iter.next().is_some() { - return err.into(); - } - i - } else { - return err.into(); - } - .unwrap(); - - self.with_inner_mut(|v| v[i] = value.clone()); - Ok(value.clone()) - } - } - } - - pub fn values_ref(&self) -> IntoIterableRefValues { - match self.clone() { - RepType::Subset(values, ..) => { - let iter = Box::new(self.iter_subset_indices()); - // for x in iter { - // dbg!(&x); - // } - let iter = Box::new(self.iter_subset_indices()); - let values = values.inner_rc(); - - IntoIterableRefValues { values, na_value: T::default(), iter } - } - } - } - - pub fn names_ref(&self) -> Option { - match self.clone() { - RepType::Subset(.., naming) => { - let iter = Box::new(self.iter_subset_indices()); - let naming = naming?; - let names = naming.names.inner_rc(); - - Some(IntoIterableRefNames { names, na_name: Character::default(), iter }) - } - } - } - - pub fn pairs_ref(&self) -> IntoIterableRefPairs { - match self.clone() { - RepType::Subset(values, _, maybe_naming) => { - let iter = Box::new(self.iter_subset_indices()); - let values = values.inner_rc(); - let names = maybe_naming.map(|x| x.names.inner_rc()); - - IntoIterableRefPairs { - values, - names, - na_value: T::default(), - na_name: Character::NA, - iter, - } - } - } - } - - pub fn iter_pairs(&self) -> IterablePairs { - match self.clone() { - RepType::Subset(values, _, maybe_naming) => { - let iter = Box::new(self.iter_subset_indices()); - let values = values.inner_rc(); - let names = maybe_naming.map(|x| x.names.inner_rc()); - - IterablePairs { values, names, iter } - } - } - } - - pub fn iter_values(&self) -> IterableValues { - match self.clone() { - RepType::Subset(values, ..) => { - let iter = Box::new(self.iter_subset_indices()); - IterableValues { values: values.inner_rc(), iter } - } - } - } - - pub fn iter_names(&self) -> Option> { - match self.clone() { - RepType::Subset(.., maybe_naming) => { - let iter = Box::new(self.iter_subset_indices()); - let names = maybe_naming.map(|x| x.names.inner_rc())?; - - Some(IterableValues { values: names, iter }) - } - } - } - - pub fn push_value(&self, value: T) { - self.push_named(Character::NA, value); - } - - pub fn push_named(&self, name: OptionNA, value: T) { - match self { - RepType::Subset(values, Subsets(subsets), maybe_naming) => match subsets.as_slice() { - [] => { - values.with_inner_mut(|values| values.push(value)); - if let Some(naming) = maybe_naming { - naming.push_name(name) - } - } - _ => unimplemented!(), - }, - } - } - - pub fn iter_subset_indices_exact(&self) -> ExactIterSubsetIndices { - // TODO(performance): Avoid the vector allocation - let iter = self.iter_subset_indices(); - let len = iter.count(); - let iter = self.iter_subset_indices(); - ExactIterSubsetIndices { iter, len } - } - - pub fn iter_subset_indices(&self) -> Box>> { - match self.clone() { - RepType::Subset(vals, subsets, maybe_naming) => { - if subsets.is_empty() { - return Box::new((0_usize..vals.len()).map(Some)); - } - - if let Some(naming) = maybe_naming { - Box::new(subsets.bind_names(naming.map).into_iter().map(|(_, y)| y)) - } else { - Box::new(subsets.into_iter().map(|(_, y)| y)) - } - } - } - } - - /// Reindex the mapping from names to indices. - pub fn reindex(&mut self) { - if let RepType::Subset(.., Some(naming)) = self { - naming.map.with_inner_mut(|map| { - map.drain(); - - for (i, maybe_name) in naming.names.borrow().iter().enumerate() { - if let OptionNA::Some(name) = maybe_name { - let indices = map.entry(name.clone()).or_default(); - if !indices.contains(&i) { - indices.push(i) - } - } - } - }) - } - } - - pub fn dedup_last(self) -> Self { - match self { - RepType::Subset(values, subsets, Some(naming)) => { - naming.with_inner_mut(|map, names| { - let mut dups: Vec = map - .iter() - .flat_map(|(_, indices)| { - indices - .split_last() - .map_or(vec![], |(_, leading_dups)| leading_dups.to_vec()) - }) - .collect(); - - dups.sort(); - - values.with_inner_mut(|vs| { - for i in dups.into_iter().rev() { - vs.remove(i); - names.remove(i); - } - }); - - for (_, indices) in map.iter_mut() { - indices.drain(0..(indices.len())); - } - }); - RepType::Subset(values, subsets, Some(naming)) - } - RepType::Subset(.., None) => self, - } - } - - pub fn set_names(&self, names: CowObj>) -> Self { - match self { - RepType::Subset(v, s, _) => { - RepType::Subset(v.clone(), s.clone(), Option::Some(names.into())) - } - } - } - - /// Access a lazy copy of the internal vector data - pub fn inner(&self) -> CowObj> { - match self.materialize() { - RepType::Subset(v, ..) => v.clone(), - } - } - - /// Get mutable access to the internal vector through the passed closure. - pub fn with_inner_mut(&self, f: F) -> R - where - F: FnOnce(&mut Vec) -> R, - { - match self { - RepType::Subset(v, ..) => v.with_inner_mut(f), - } - } - - /// Subsetting a Vector - /// - /// Introduce a new subset into the aggregate list of subset indices. - pub fn subset(&self, subset: Subset) -> Self { - match self { - RepType::Subset(v, Subsets(subsets), n) => { - let mut subsets = subsets.clone(); - subsets.push(subset); - RepType::Subset(v.view_mut(), Subsets(subsets), n.clone()) - } - } - } - - pub fn len(&self) -> usize { - match self { - RepType::Subset(v, Subsets(s), _) => match s.as_slice() { - [] => v.borrow().len(), - _ => self.values_ref().iter().count(), - }, - } - } - #[must_use] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Get a single element from a vector - /// - /// Access a single element without materializing a new vector - /// - pub fn get(&self, index: usize) -> Option> - where - T: Clone, - { - match self { - RepType::Subset(v, subsets, _) => { - let vb = v.borrow(); - let index = subsets.get_index_at(index)?; - let elem = vb.get(index)?; - Some(RepType::Subset( - vec![elem.clone()].into(), - Subsets::new(), - Option::Some(Naming::new()), - )) - } - } - } - - /// Assignment to Subset Indices - /// - /// Assignment to a vector from another. The aggregate subsetted indices - /// are iterated over while performing the assignment. - /// - pub fn assign(&mut self, value: RepType) -> Result - where - T: Clone + Default + From, - R: Default + Clone, - { - let l_indices = self.iter_subset_indices_exact(); - let mut r_indices = value.iter_subset_indices_exact(); - - // TODO(performance): When we clone the interior data of self (to which we write) - // we don't have to perform recycling checks - // and just start iterating. We can always discard the result afterwards again - // Maybe implement filter_exact on (named)subsets - if r_indices.len() == 1 { - // get the element from reptype value - let index = r_indices - .next() - .expect("index should exist") - .expect("No NA for subsetting"); - let elem = value.get_inner(index).expect("element should exist"); - match (self, value) { - (RepType::Subset(lv, ls, ln), RepType::Subset(..)) => { - lv.with_inner_mut(|lvb| { - for li in l_indices { - lvb[li.unwrap()] = elem.clone().into(); - } - }); - return Ok(RepType::Subset(lv.clone(), ls.clone(), ln.clone())); - } - } - } - - if l_indices.len() != r_indices.len() { - return Err(Signal::Error(Error::NonRecyclableLengths( - l_indices.len(), - r_indices.len(), - ))); - } - - match (self, value) { - (RepType::Subset(lv, ls, ln), RepType::Subset(rv, ..)) => { - lv.with_inner_mut(|lvb| { - let rvc = rv.clone(); - let rvb = rvc.borrow(); - - for (li, ri) in l_indices.zip(r_indices) { - match (li, ri) { - (Some(li), None) => lvb[li] = T::default(), - (Some(li), Some(ri)) => lvb[li] = rvb[ri % rvb.len()].clone().into(), - _ => (), - } - } - }); - - Ok(RepType::Subset(lv.clone(), ls.clone(), ln.clone())) - } - } - } - - // implement materialize_inplace - fn materialize_inplace(&mut self) { - *self = self.materialize(); - } - - /// Return the only value if the vector has length 1. - pub fn as_scalar(&self) -> Option { - let mut into_iter = self.values_ref(); - let mut iter = into_iter.iter(); - if let Some(x) = iter.next() { - if iter.next().is_none() { - return Some(x.clone()); - } - }; - None - } - - /// Materialize a Vector - /// - /// Apply subsets and clone values into a new vector. - pub fn materialize(&self) -> Self - where - T: Clone, - { - match self { - RepType::Subset(v, subsets, naming) => { - // early exit when there is nothing to do - match subsets { - Subsets(s) => { - if s.as_slice().is_empty() { - return self.clone(); - } - } - } - - let vc = v.clone(); - let vb = vc.borrow(); - let mut res: Vec = vec![]; - let vb_len = vb.len(); - - let new_naming = Naming::new(); - - let iter = subsets.clone().into_iter().take_while(|(i, _)| i < &vb_len); - - for (_, i) in iter { - match i { - Some(i) => { - res.push(vb[i].clone()); - if let Option::Some(n) = naming { - new_naming.push_name(n.names.borrow()[i].clone()) - }; - } - // default is NA - None => { - res.push(T::default()); - // When we subset with NA, there is no name for this entry; - new_naming.push_name(OptionNA::NA); - } - } - } - - RepType::Subset(res.into(), Subsets(vec![]), Option::None) - } - } - } - - pub fn is_double(&self) -> bool - where - T: AtomicMode, - { - T::is_double() - } - - pub fn is_logical(&self) -> bool - where - T: AtomicMode, - { - T::is_logical() - } - - pub fn is_integer(&self) -> bool - where - T: AtomicMode, - { - T::is_integer() - } - - pub fn is_character(&self) -> bool - where - T: AtomicMode, - { - T::is_character() - } - - pub fn as_mode(&self) -> RepType - where - T: CoercibleInto, - Mode: Clone, - { - match self { - RepType::Subset(v, subsets, naming) => { - let vc = v.clone(); - let vb = vc.borrow(); - - let num_vec: Vec = vb.iter().map(|i| (*i).clone().coerce_into()).collect(); - - RepType::Subset(num_vec.into(), subsets.clone(), naming.clone()) - } - } - } - - pub fn as_logical(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn as_integer(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn as_double(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn as_character(&self) -> RepType - where - T: CoercibleInto, - { - self.as_mode::() - } - - pub fn get_inner(&self, index: usize) -> Option { - match self { - RepType::Subset(v, subsets, maybe_naming) => { - if maybe_naming.is_some() { - // TODO(NOW) - unimplemented!() - } - let vb = v.borrow(); - let index = subsets.get_index_at(index)?; - vb.get(index).cloned() - } - } - } -} - -pub struct ExactIterSubsetIndices { - iter: Box>>, - len: usize, -} - -impl ExactSizeIterator for ExactIterSubsetIndices { - fn len(&self) -> usize { - self.len - } -} - -impl Iterator for ExactIterSubsetIndices { - type Item = Option; - fn next(&mut self) -> Option { - self.iter.next() - } -} - -impl TryInto for RepType> -where - OptionNA: AtomicMode + Clone + CoercibleInto>, -{ - type Error = (); - fn try_into(self) -> Result { - self.get_inner(0).map_or( - Err(()), - |i| match CoercibleInto::>::coerce_into(i) { - OptionNA::Some(x) => Ok(x), - OptionNA::NA => Err(()), - }, - ) - } -} - -impl From> for Naming { - fn from(value: Vec) -> Self { - let naming = Naming::new(); - for k in value { - naming.push_name(k); - } - naming - } -} - -impl From>> for RepType { - fn from(value: CowObj>) -> Self { - RepType::Subset(value, Subsets::default(), Option::None) - } -} - -impl From, T)>> for RepType { - fn from(value: Vec<(Option, T)>) -> Self { - let mut names = Vec::with_capacity(value.len()); - let mut values = Vec::with_capacity(value.len()); - for (k, v) in value.into_iter() { - names.push(k.map_or(Character::NA, Character::Some)); - values.push(v) - } - let naming = Naming::from(names); - RepType::Subset(values.into(), Subsets::default(), Some(naming)) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From>> for RepType { - fn from(value: Vec>) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From> for RepType { - fn from(value: Vec) -> Self { - let value: Vec<_> = value.into_iter().map(|i| i.coerce_into()).collect(); - RepType::Subset(value.into(), Subsets(Vec::new()), Option::None) - } -} - -impl From<(Vec, Subsets)> for RepType -where - RepType: From>, - T: Clone, -{ - fn from(value: (Vec, Subsets)) -> Self { - match Self::from(value.0) { - RepType::Subset(v, ..) => RepType::Subset(v, value.1, Option::None), - } - } -} - -impl Display for RepType -where - T: AtomicMode + Debug + Default + Clone, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let n = self.len(); - if n == 0 { - if self.is_double() { - return write!(f, "double(0)"); - } - if self.is_integer() { - return write!(f, "integer(0)"); - } - if self.is_logical() { - return write!(f, "logical(0)"); - } - if self.is_character() { - return write!(f, "character(0)"); - } - } - let nlen = format!("{}", n).len(); - // calculate how many characters are printed per value. - // The iteraror yields the characters needed for a specific item. - fn element_width(iter: impl Iterator) -> usize { - let mut elt_width = 1_usize; - for (i, width) in iter.enumerate() { - elt_width = std::cmp::max(elt_width, width); - if elt_width * (i + 1) >= 20 * 80 { - break; - } - } - elt_width - } - - if !self.is_named() { - let elt_width = - element_width(self.values_ref().iter().map(|x| format!("{:?}", x).len())); - - let mut values_ref = self.values_ref(); - let x_strs = values_ref.iter().map(|xi| format!("{:?}", xi)); - - let mut col = 0; - let gutterlen = 2 + nlen + 1; - - // hard coded max print & console width - // we print at most 20 rows - let maxprint = 20 * ((80 - gutterlen) / (elt_width + 1)); - - x_strs - .take(maxprint) - .enumerate() - .try_for_each(|(i, x_str)| { - if i == 0 { - col = gutterlen + elt_width; - write!( - f, - "{:>3$}[{}] {:>4$}", - "", - i + 1, - x_str, - nlen - 1, - elt_width - ) - } else if col + 1 + elt_width > 80 { - col = gutterlen + elt_width; - let i_str = format!("{}", i + 1); - let gutter = nlen - i_str.len(); - write!( - f, - "\n{:>3$}[{}] {:>4$}", - "", i_str, x_str, gutter, elt_width - ) - } else { - col += 1 + elt_width; - write!(f, " {:>1$}", x_str, elt_width) - } - })?; - - if n > maxprint { - write!(f, "\n[ omitting {} entries ]", n - maxprint)?; - } - } else { - let elt_width = element_width( - self.pairs_ref() - .iter() - .map(|x| std::cmp::max(format!("{:}", x.0).len(), format!("{:?}", x.1).len())), - ); - let mut values_ref = self.values_ref(); - let mut names_ref = self - .names_ref() - .expect("already checked existence of names"); - - let mut values_strs = values_ref.iter().map(|x| format!("{:?}", x)); - let mut names_strs = names_ref.iter().map(|x| format!("{:}", x)); - - // hard coded max print & console width - // we print at most 20 rows - let elts_per_line = 80 / (elt_width + 1); - - 'lines: for _ in 1..=20 { - for _ in 1..=elts_per_line { - if let Some(name) = names_strs.next() { - write!(f, "{:}{:>2$}", name, " ", elt_width - name.len())?; - } else { - break; - } - } - writeln!(f)?; - for _ in 1..=elts_per_line { - if let Some(value) = values_strs.next() { - write!(f, "{:}{:>2$}", value, " ", elt_width - value.len())?; - } else { - break 'lines; - } - } - writeln!(f)?; - } - } - Ok(()) - } -} - -impl std::ops::Neg for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - LNum: std::ops::Neg, - RepType: From>, - O: Clone, -{ - type Output = Result, Signal>; - fn neg(self) -> Self::Output { - let result: Vec = self - .iter_values() - .map(|x| -(CoercibleInto::::coerce_into(x))) - .collect(); - Ok(result.into()) - } -} - -impl std::ops::Add> for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - (LNum, RNum): CommonNum, - C: Clone + std::ops::Add + Default, - RepType: From>, - O: Clone + Default, -{ - type Output = Result, Signal>; - fn add(self, rhs: RepType) -> Self::Output { - try_binary_num_op(self, rhs, |x, y| x + y) - } -} - -impl std::ops::Sub> for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - (LNum, RNum): CommonNum, - C: Clone + std::ops::Sub + Default, - RepType: From>, - O: Clone + Default, -{ - type Output = Result, Signal>; - fn sub(self, rhs: RepType) -> Self::Output { - try_binary_num_op(self, rhs, |x, y| x - y) - } -} - -impl std::ops::Mul> for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - (LNum, RNum): CommonNum, - C: Clone + std::ops::Mul + Default, - RepType: From>, - O: Clone + Default, -{ - type Output = Result, Signal>; - fn mul(self, rhs: RepType) -> Self::Output { - try_binary_num_op(self, rhs, |x, y| x * y) - } -} - -impl std::ops::Div> for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - (LNum, RNum): CommonNum, - C: Clone + std::ops::Div + Default, - RepType: From>, - O: Clone + Default, -{ - type Output = Result, Signal>; - fn div(self, rhs: RepType) -> Self::Output { - try_binary_num_op(self, rhs, |x, y| x / y) - } -} - -impl std::ops::Rem> for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - (LNum, RNum): CommonNum, - C: Clone + std::ops::Rem + Default, - RepType: From>, - O: Clone + Default, -{ - type Output = Result, Signal>; - fn rem(self, rhs: RepType) -> Self::Output { - try_binary_num_op(self, rhs, |x, y| x % y) - } -} - -impl Pow> for RepType -where - L: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - R: AtomicMode + Default + Clone + MinimallyNumeric + CoercibleInto, - (LNum, RNum): CommonNum, - O: Pow, - RepType: From>, - O: Default, - L: Clone, - R: Clone, - O: Clone, -{ - type Output = Result, Signal>; - fn power(self, rhs: RepType) -> Self::Output { - try_binary_num_op(self, rhs, |x, y| Pow::power(x, y)) - } -} - -impl std::ops::BitOr> for RepType -where - L: AtomicMode + Default + Clone + CoercibleInto, - R: AtomicMode + Default + Clone + CoercibleInto, -{ - type Output = Result, Signal>; - fn bitor(self, rhs: RepType) -> Self::Output { - try_binary_lgl_op(self, rhs, |x, y| x | y) - } -} - -impl std::ops::BitAnd> for RepType -where - L: AtomicMode + Default + Clone + CoercibleInto, - R: AtomicMode + Default + Clone + CoercibleInto, -{ - type Output = Result, Signal>; - fn bitand(self, rhs: RepType) -> Self::Output { - try_binary_lgl_op(self, rhs, |x, y| x & y) - } -} - -impl std::ops::Not for RepType -where - L: AtomicMode + Default + Clone + CoercibleInto, -{ - type Output = Result, Signal>; - fn not(self) -> Self::Output { - let result: Vec = self - .iter_values() - .map(|x| !(CoercibleInto::::coerce_into(x))) - .collect(); - Ok(result.into()) - } -} - -impl VecPartialCmp> for RepType -where - L: AtomicMode + Default + Clone + CoercibleInto + Clone, - R: AtomicMode + Default + Clone + CoercibleInto + Clone, - (L, R): CommonCmp, - C: PartialOrd + Clone + Default, -{ - type Output = Result, Signal>; - - fn vec_gt(self, rhs: RepType) -> Self::Output { - use std::cmp::Ordering::*; - try_binary_cmp_op(self, rhs, |i| match i { - Some(Greater) => OptionNA::Some(true), - Some(_) => OptionNA::Some(false), - None => OptionNA::NA, - }) - } - - fn vec_gte(self, rhs: RepType) -> Self::Output { - use std::cmp::Ordering::*; - try_binary_cmp_op(self, rhs, |i| match i { - Some(Greater | Equal) => OptionNA::Some(true), - Some(_) => OptionNA::Some(false), - None => OptionNA::NA, - }) - } - - fn vec_lt(self, rhs: RepType) -> Self::Output { - use std::cmp::Ordering::*; - try_binary_cmp_op(self, rhs, |i| match i { - Some(Less) => OptionNA::Some(true), - Some(_) => OptionNA::Some(false), - None => OptionNA::NA, - }) - } - - fn vec_lte(self, rhs: RepType) -> Self::Output { - use std::cmp::Ordering::*; - try_binary_cmp_op(self, rhs, |i| match i { - Some(Less | Equal) => OptionNA::Some(true), - Some(_) => OptionNA::Some(false), - None => OptionNA::NA, - }) - } - - fn vec_eq(self, rhs: RepType) -> Self::Output { - use std::cmp::Ordering::*; - try_binary_cmp_op(self, rhs, |i| match i { - Some(Equal) => OptionNA::Some(true), - Some(_) => OptionNA::Some(false), - None => OptionNA::NA, - }) - } - - fn vec_neq(self, rhs: RepType) -> Self::Output { - use std::cmp::Ordering::*; - try_binary_cmp_op(self, rhs, |i| match i { - Some(Equal) => OptionNA::Some(false), - Some(_) => OptionNA::Some(true), - None => OptionNA::NA, - }) - } -} - -/// This function applies a function `g` to pairs from lhs and rhs. -/// The function returns an error when the lengths are not compatible. -fn try_recycle_then( - lhs: RepType, - rhs: RepType, - g: F, -) -> Result, Signal> -where - L: Clone + Default, - R: Clone + Default, - RepType: From>, - O: Clone + Default, - A: Clone, - F: Fn(L, R) -> O, -{ - match (lhs.as_scalar(), rhs.as_scalar()) { - (Some(l), Some(r)) => { - let result: Vec = vec![g(l, r)]; - Ok(RepType::from(result)) - } - (Some(l), None) => { - let result: Vec = repeat(l) - .zip(rhs.iter_values()) - .map(|(l, r)| g(l, r)) - .collect(); - if result.is_empty() { - return Err(Signal::Error(Error::NonRecyclableLengths(1, 0))); - } - Ok(RepType::from(result)) - } - (None, Some(r)) => { - let result: Vec = lhs - .iter_values() - .zip(repeat(r)) - .map(|(l, r)| g(l, r)) - .collect(); - if result.is_empty() { - return Err(Signal::Error(Error::NonRecyclableLengths(0, 1))); - } - Ok(RepType::from(result)) - } - (None, None) => { - let mut lc = lhs.iter_values(); - let mut rc = rhs.iter_values(); - - let max_size = std::cmp::max(lc.size_hint().0, rc.size_hint().0); - - let mut result: Vec = Vec::with_capacity(max_size); - - loop { - match (lc.next(), rc.next()) { - (Some(l), Some(r)) => result.push(g(l, r)), - (Some(_), None) => { - return Err(Signal::Error(Error::NonRecyclableLengths( - result.len() + 1 + lc.count(), - result.len(), - ))); - } - (None, Some(_)) => { - return Err(Signal::Error(Error::NonRecyclableLengths( - result.len(), - result.len() + 1 + rc.count(), - ))); - } - (None, None) => return Ok(RepType::from(result)), - } - } - } - } -} - -fn try_binary_num_op( - lhs: RepType, - rhs: RepType, - f: F, -) -> Result, Signal> -where - L: Default + Clone + MinimallyNumeric + CoercibleInto, - R: Default + Clone + MinimallyNumeric + CoercibleInto, - C: Default + Clone, - (LNum, RNum): CommonNum, - RepType: From>, - O: Clone + Default, - F: Fn(C, C) -> O, - C: Clone + Default, -{ - try_recycle_then(lhs, rhs, |x, y| { - let (c1, c2) = ( - CoercibleInto::::coerce_into(x), - CoercibleInto::::coerce_into(y), - ) - .into_common(); - f(c1, c2) - }) -} - -// FIXME(performance): equality with references for characters -fn try_binary_cmp_op( - lhs: RepType, - rhs: RepType, - f: F, -) -> Result, Signal> -where - L: AtomicMode + Default + Clone + CoercibleInto + Clone, - R: AtomicMode + Default + Clone + CoercibleInto + Clone, - (L, R): CommonCmp, - C: PartialOrd + Clone + Default, - F: Fn(Option) -> Logical, -{ - try_recycle_then(lhs, rhs, |x, y| { - let c1: C = x.coerce_into(); - let c2: C = y.coerce_into(); - let ordering = c1.partial_cmp(&c2); - f(ordering) - }) -} - -pub fn try_binary_lgl_op( - lhs: RepType, - rhs: RepType, - f: F, -) -> Result, Signal> -where - L: AtomicMode + Default + Clone + CoercibleInto, - R: AtomicMode + Default + Clone + CoercibleInto, - F: Fn(Logical, Logical) -> Logical, -{ - try_recycle_then(lhs, rhs, |x, y| { - let (c1, c2) = ( - CoercibleInto::::coerce_into(x), - CoercibleInto::::coerce_into(y), - ); - f(c1, c2) - }) -} -#[cfg(test)] -mod test { - use super::OptionNA::*; - use crate::object::reptype::RepType; - use crate::object::{types::*, OptionNA, VecPartialCmp}; - use crate::r; - use crate::utils::SameType; - - #[test] - fn vector_add() { - let x = RepType::::from((1..=5).collect::>()); - let y = RepType::::from(vec![2, 5, 6, 2, 3]); - - let z = (x + y).unwrap(); - assert_eq!(z, RepType::from(vec![3, 7, 9, 6, 8])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_integer()); - } - - #[test] - fn vector_mul() { - let x = RepType::::from((1..=5).collect::>()); - let y = RepType::::from(vec![Some(2), NA, Some(6), NA, Some(3)]); - - let z = (x * y).unwrap(); - assert_eq!(z, RepType::from(vec![Some(2), NA, Some(18), NA, Some(15),])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_integer()); - } - - #[test] - fn vector_common_mul_f32_na() { - // expect that f32's do not get coerced into an OptionNA:: instead - // using std::f32::NAN as NA representation. - - let x = RepType::::from(vec![Some(0_f64), NA, Some(10_f64)]); - let y = RepType::::from(vec![100, 10, 1]); - - let z = (x * y).unwrap(); - // assert_eq!(z, Vector::from(vec![0_f32, std::f32::NAN, 1_000_f32])); - // comparing floats is error prone - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_double()); - } - - #[test] - fn vector_and() { - // expect that f32's do not get coerced into an OptionNA:: instead - // using std::f32::NAN as NA representation. - - let x = RepType::::from(vec![Some(0_f64), NA, Some(10_f64)]); - let y = RepType::::from(vec![100, 10, 1]); - - let z = (x & y).unwrap(); - assert_eq!(z, RepType::from(vec![Some(false), NA, Some(true)])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_logical()); - } - - #[test] - fn vector_gt() { - // expect that f32's do not get coerced into an instead - // using std::f32::NAN as NA representation. - - let x = RepType::from(vec![Some(0_f64), NA, Some(10000_f64)]); - let y = RepType::::from(vec![100, 10, 1]); - - let z = x.vec_gt(y).unwrap(); - assert_eq!(z, RepType::from(vec![Some(false), NA, Some(true)])); - - let expected_type = RepType::::new(); - assert!(z.is_same_type_as(&expected_type)); - assert!(z.is_logical()); - } - - #[test] - fn test_iter_values() { - // Create values as Vec - let values = vec![1, 2, 3, 4, 5]; - - // Create RepType from values - let rep = RepType::from(values.clone()); - - // Use iter_values to get an iterator and collect values - let collected_values: Vec = rep.iter_values().collect(); - - // Expected values as Vec> - let expected_values: Vec = values.into_iter().map(OptionNA::Some).collect(); - - // Assert collected values match expected values - assert_eq!(collected_values, expected_values); - } - - #[test] - fn test_iter_names() { - // Create values with names - let values_with_names = vec![ - (Character::Some(String::from("a")), 1), - (Character::Some(String::from("b")), 2), - (Character::NA, 3), - (Character::Some(String::from("d")), 4), - (Character::NA, 5), - ]; - - // Create RepType from values with names - let rep = RepType::from(values_with_names.clone()); - - // Use iter_names to get an iterator - let names_iter = rep.iter_names(); - - // Ensure iter_names is Some iterator - assert!(names_iter.is_some()); - - // Collect names - let collected_names: Vec = names_iter.unwrap().collect(); - - // Expected names - let expected_names: Vec = values_with_names - .iter() - .map(|(name_opt, _)| match name_opt { - Some(name) => Character::Some(name.clone()), - Character::NA => Character::NA, - }) - .collect(); - - // Assert collected names match expected names - assert_eq!(collected_names, expected_names); - } - - use crate::object::{Obj, Vector}; - // The tests below don't test the subsetting mechanism, which is instead tested in subsets.rs - #[test] - fn iter_pairs_mixed_names() { - let x = r!(c(a = 1, 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.iter_pairs() - } else { - unreachable!() - }; - - assert_eq!( - x.next().unwrap(), - (Character::Some("a".to_string()), Double::Some(1.0)) - ); - assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); - assert_eq!(x.next(), None); - } - - #[test] - fn iter_pairs_no_names() { - let x = r!(c(1, 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.iter_pairs() - } else { - unreachable!() - }; - - assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(1.0))); - assert_eq!(x.next().unwrap(), (Character::NA, Double::Some(2.0))); - assert_eq!(x.next(), None); - } - - #[test] - fn iter_values() { - let x = r!(c(1, 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.iter_values() - } else { - unreachable!() - }; - - assert_eq!(x.next().unwrap(), Double::Some(1.0)); - assert_eq!(x.next().unwrap(), Double::Some(2.0)); - assert_eq!(x.next(), None); - } - - #[test] - fn iter_names_none() { - let x = r!(c(1, 2)).unwrap(); - - let x = if let Obj::Vector(Vector::Double(r)) = x { - r.iter_names() - } else { - unreachable!() - }; - - assert!(x.is_none()) - } - - #[test] - fn iter_names_some() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.iter_names().unwrap() - } else { - unreachable!() - }; - - assert_eq!(x.next().unwrap(), Character::NA); - assert_eq!(x.next().unwrap(), Character::Some("b".to_string())); - assert_eq!(x.next(), None); - } - - #[test] - fn names_ref_iter_some() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.names_ref().unwrap() - } else { - unreachable!() - }; - - let mut x = x.iter(); - - assert_eq!(x.next().unwrap(), &Character::NA); - assert_eq!(x.next().unwrap(), &Character::Some("b".to_string())); - assert_eq!(x.next(), None); - } - - #[test] - #[should_panic] - fn names_ref_iter_none() { - let x = r!(c(1, 2)).unwrap(); - - if let Obj::Vector(Vector::Double(r)) = x { - r.names_ref().unwrap() - } else { - unreachable!() - }; - } - - #[test] - fn values_ref_iter() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.values_ref() - } else { - unreachable!() - }; - - let mut x = x.iter(); - - assert_eq!(x.next().unwrap(), &Double::Some(1.0)); - assert_eq!(x.next().unwrap(), &Double::Some(2.0)); - assert_eq!(x.next(), None); - } - - #[test] - fn pairs_ref_iter() { - let x = r!(c(1, b = 2)).unwrap(); - - let mut x = if let Obj::Vector(Vector::Double(r)) = x { - r.pairs_ref() - } else { - unreachable!() - }; - - let mut x = x.iter(); - - assert_eq!(x.next().unwrap(), (&Character::NA, &Double::Some(1.0))); - assert_eq!( - x.next().unwrap(), - (&Character::Some("b".to_string()), &Double::Some(2.0)) - ); - assert_eq!(x.next(), None); - } - - use crate::error::Error; - use crate::lang::Signal; - - #[test] - fn assign_recycle_incompatible() { - let mut x = RepType::::from(vec![1, 2, 3]); - let y = RepType::::from(vec![99, 99]); - let result = x.assign(y); - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(3, 2)) - ); - } - #[test] - fn assign_recycle_length_one() { - let x = RepType::::from(vec![1, 2, 3]); - let y = RepType::::from(vec![99]); - let mut xview = x.subset(vec![0, 1].into()); - let _ = xview.assign(y).unwrap(); - let result_vec: Vec<_> = x.iter_values().collect(); - assert_eq!(result_vec, vec![Some(99), Some(99), Some(3)]) - } - #[test] - fn non_recyclable_lengths_3_2() { - let x = RepType::::from(vec![1, 2, 3]); - let y = RepType::::from(vec![99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(3, 2)) - ); - } - #[test] - fn non_recyclable_lengths_4_2() { - let x = RepType::::from(vec![1, 2, 3, 4]); - let y = RepType::::from(vec![99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(4, 2)) - ); - } - #[test] - fn non_recyclable_lengths_2_3() { - let x = RepType::::from(vec![1, 2]); - let y = RepType::::from(vec![99, 99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(2, 3)) - ); - } - #[test] - fn non_recyclable_lengths_2_4() { - let x = RepType::::from(vec![1, 2]); - let y = RepType::::from(vec![99, 99, 99, 99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(2, 4)) - ); - } - #[test] - fn non_recyclable_lengths_0_1() { - let x = RepType::::from(Vec::::new()); - let y = RepType::::from(vec![99]); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(0, 1)) - ); - } - #[test] - fn non_recyclable_lengths_1_0() { - let x = RepType::::from(vec![99]); - let y = RepType::::from(Vec::::new()); - let result = x + y; - assert_eq!( - result.unwrap_err(), - Signal::Error(Error::NonRecyclableLengths(1, 0)) - ); - } -} diff --git a/src/object/vector/subset.rs b/src/object/vector/subset.rs index 243ba73..2f4ba84 100644 --- a/src/object/vector/subset.rs +++ b/src/object/vector/subset.rs @@ -163,18 +163,13 @@ impl Subset { } } Subset::Mask(mask) => { - Box::new( - (**mask.borrow()) - .clone() - .into_iter() - .cycle() - .zip(iter) - .filter_map(|(mask, i @ (i_orig, _))| match mask { - OptionNA::Some(true) => Some(i), // accept index - OptionNA::NA => Some((i_orig, None)), // accept, but NA - _ => None, // filter falses - }), - ) + Box::new((**mask.borrow()).clone().into_iter().zip(iter).filter_map( + |(mask, i @ (i_orig, _))| match mask { + OptionNA::Some(true) => Some(i), // accept index + OptionNA::NA => Some((i_orig, None)), // accept, but NA + _ => None, // filter falses + }, + )) } Subset::Range(range) => Box::new( iter.skip(range.start) From 8a9be408f0d2c7843622984d80be8a1d1c76a647 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Thu, 17 Oct 2024 08:35:14 +0200 Subject: [PATCH 5/8] ... --- src/object/vector/rep.rs | 82 +++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 9 deletions(-) diff --git a/src/object/vector/rep.rs b/src/object/vector/rep.rs index d9753cb..d6ce7ca 100644 --- a/src/object/vector/rep.rs +++ b/src/object/vector/rep.rs @@ -27,7 +27,7 @@ impl Naming { Naming::default() } - /// Create a naming with the given `capacity`. + /// See [Self::is_double] for more information pub fn with_capacity(capacity: usize) -> Self { Self { map: HashMap::>::with_capacity(capacity).into(), @@ -362,14 +362,8 @@ impl Rep { } } - // fn materialize_inplace(&self) -> &Self { - // // TODO: Rewrite this to avoid copying unnecessarily - // let new_repr = { self.borrow().materialize() }; - // self.0.replace(new_repr); - - // self - // } - + /// Change a value at the location given by `subset` to the provided `value`. + /// If the `subset` does not have length `1`, an error is returned. pub fn set_subset(&mut self, subset: Subset, value: T) -> Result { match &self { Rep::Subset(..) => { @@ -396,6 +390,11 @@ impl Rep { } } + /// Get an `Option>` which in turn can be converted into an iterator over + /// references to the values. + /// The `None` variant is returned if the `Rep` is not named. + /// + /// Directly getting an iterator is not possible due to lifetime issues. pub fn values_ref(&self) -> IntoIterableRefValues { match self.clone() { Rep::Subset(values, ..) => { @@ -407,6 +406,10 @@ impl Rep { } } + /// Get an `RepTypeIntoIterableValues` which in turn can be converted into an iterator over + /// references to the names. + /// + /// Directly getting an iterator is not possible due to lifetime issues. pub fn names_ref(&self) -> Option { match self.clone() { Rep::Subset(.., naming) => { @@ -419,6 +422,10 @@ impl Rep { } } + /// Get an `RepTypeIntoIterablePairs` which in turn can be converted into an iterator over + /// pairs of references (&name, &value). + /// + /// Directly getting an iterator is not possible due to lifetime issues. pub fn pairs_ref(&self) -> IntoIterableRefPairs { match self.clone() { Rep::Subset(values, _, maybe_naming) => { @@ -449,6 +456,7 @@ impl Rep { } } + /// Iterate over the (owned) values of the vector. pub fn iter_values(&self) -> IterableValues { match self.clone() { Rep::Subset(values, ..) => { @@ -458,6 +466,7 @@ impl Rep { } } + /// Iterate over the names of the vector (if they exist). pub fn iter_names(&self) -> Option> { match self.clone() { Rep::Subset(.., maybe_naming) => { @@ -473,6 +482,7 @@ impl Rep { self.push_named(Character::NA, value); } + /// Push a named `value` with a given `name` onto the `Rep`. pub fn push_named(&self, name: OptionNA, value: T) { match self { Rep::Subset(values, Subsets(subsets), maybe_naming) => match subsets.as_slice() { @@ -529,6 +539,21 @@ impl Rep { } } + /// Constructs a new, empty `Rep` with at least the specified `capacity`. + /// Names are only include if `names` is true. + pub fn with_capacity(capacity: usize, names: bool) -> Self { + let naming = if names { + Some(Naming::with_capacity(capacity)) + } else { + None + }; + Rep::Subset( + CowObj::from(Vec::with_capacity(capacity)), + Subsets::default(), + naming, + ) + } + pub fn dedup_last(self) -> Self { match self { Rep::Subset(values, subsets, Some(naming)) => { @@ -597,6 +622,7 @@ impl Rep { } } + /// The length of the vector. pub fn len(&self) -> usize { match self { Rep::Subset(v, Subsets(s), _) => match s.as_slice() { @@ -605,6 +631,8 @@ impl Rep { }, } } + + /// Whether the vector has length 0. #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 @@ -756,6 +784,11 @@ impl Rep { } } + /// Test the mode of the internal vector type + /// + /// Internally, this is defined by the [crate::object::coercion::AtomicMode] + /// implementation of the vector's element type. + /// pub fn is_double(&self) -> bool where T: AtomicMode, @@ -763,6 +796,7 @@ impl Rep { T::is_double() } + /// See [Self::is_double] for more information pub fn is_logical(&self) -> bool where T: AtomicMode, @@ -770,6 +804,7 @@ impl Rep { T::is_logical() } + /// See [Self::is_double] for more information pub fn is_integer(&self) -> bool where T: AtomicMode, @@ -777,6 +812,7 @@ impl Rep { T::is_integer() } + /// See [Self::is_double] for more information pub fn is_character(&self) -> bool where T: AtomicMode, @@ -784,6 +820,30 @@ impl Rep { T::is_character() } + /// Convert a Vector into a vector of a specific class of internal type + /// + /// The internal type only needs to satisfy + /// [crate::object::coercion::CoercibleInto] for the `Mode`, and for the `Mode` + /// type to implement [crate::object::coercion::AtomicMode]. Generally, + /// this is used more directly via [Self::as_logical], [Self::as_integer], + /// [Self::as_double] and [Self::as_character], which predefine the output + /// type of the mode. + /// + /// ``` + /// use r::object::Vector; + /// use r::object::OptionNA; + /// + /// let x = Vector::from(vec![false, true, true, false]); + /// let n = x.as_double(); + /// + /// assert_eq!(n, Vector::from(vec![ + /// OptionNA::Some(0_f64), + /// OptionNA::Some(1_f64), + /// OptionNA::Some(1_f64), + /// OptionNA::Some(0_f64) + /// ])) + /// ``` + /// pub fn as_mode(&self) -> Rep where T: CoercibleInto, @@ -801,6 +861,7 @@ impl Rep { } } + /// See [Self::as_mode] for more information pub fn as_logical(&self) -> Rep where T: CoercibleInto, @@ -808,6 +869,7 @@ impl Rep { self.as_mode::() } + /// See [Self::as_mode] for more information pub fn as_integer(&self) -> Rep where T: CoercibleInto, @@ -815,6 +877,7 @@ impl Rep { self.as_mode::() } + /// See [Self::as_mode] for more information pub fn as_double(&self) -> Rep where T: CoercibleInto, @@ -822,6 +885,7 @@ impl Rep { self.as_mode::() } + /// See [Self::as_mode] for more information pub fn as_character(&self) -> Rep where T: CoercibleInto, From c13af5ede81c1d3e417a744b5eea65a8e296f43d Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Thu, 17 Oct 2024 08:46:34 +0200 Subject: [PATCH 6/8] ... --- src/callable/core.rs | 2 +- src/object/vector/core.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/callable/core.rs b/src/callable/core.rs index 457c34f..43334a4 100644 --- a/src/callable/core.rs +++ b/src/callable/core.rs @@ -47,7 +47,7 @@ pub trait Callable: CallableFormals { for (i, (maybe_name, value)) in args.pairs_ref().iter().enumerate() { if let Character::Some(name) = maybe_name { - if let Some((Some(_), _)) = formals.remove_named(&name) { + if let Some((Some(_), _)) = formals.remove_named(name) { matched_args.push_named(Character::Some(name.clone()), value.clone()); continue; } diff --git a/src/object/vector/core.rs b/src/object/vector/core.rs index b819163..850eb84 100644 --- a/src/object/vector/core.rs +++ b/src/object/vector/core.rs @@ -325,25 +325,25 @@ impl From>> for Vector { impl From> for Vector { fn from(x: Rep) -> Self { - Vector::Double(x.into()) + Vector::Double(x) } } impl From> for Vector { fn from(x: Rep) -> Self { - Vector::Integer(x.into()) + Vector::Integer(x) } } impl From> for Vector { fn from(x: Rep) -> Self { - Vector::Logical(x.into()) + Vector::Logical(x) } } impl From> for Vector { fn from(x: Rep) -> Self { - Vector::Character(x.into()) + Vector::Character(x) } } From 710301f918ba520dc7c7242c615224f42ffa98bb Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Thu, 17 Oct 2024 08:47:18 +0200 Subject: [PATCH 7/8] ... --- src/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CHANGELOG.md b/src/CHANGELOG.md index f7e1151..a61937a 100644 --- a/src/CHANGELOG.md +++ b/src/CHANGELOG.md @@ -20,7 +20,7 @@ This included a considerable refactor. * Iterating over references of a `Rep` was made much simpler and new methods were added and unused ones removed. -* The `RepType` struct that was introduced in 0.4.0 was removed again. +* The `RepType` struct that was introduced in 0.4.0 was removed again (#189). ## Notable Bugs Addressed From d5ecad3418cef93c252ec0b4a94a2f0314ff55b0 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Thu, 17 Oct 2024 08:51:06 +0200 Subject: [PATCH 8/8] ... --- src/object/vector/rep.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object/vector/rep.rs b/src/object/vector/rep.rs index d6ce7ca..fc48e17 100644 --- a/src/object/vector/rep.rs +++ b/src/object/vector/rep.rs @@ -27,7 +27,7 @@ impl Naming { Naming::default() } - /// See [Self::is_double] for more information + // Allocates a new Naming with a capacity for `capacity` elements. pub fn with_capacity(capacity: usize) -> Self { Self { map: HashMap::>::with_capacity(capacity).into(),