diff --git a/UQPyL/surrogates/mars/core/_basis.pxd b/UQPyL/surrogates/mars/core/_basis.pxd new file mode 100644 index 00000000..b131f333 --- /dev/null +++ b/UQPyL/surrogates/mars/core/_basis.pxd @@ -0,0 +1,239 @@ +from cpython cimport bool +cimport numpy as cnp +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t + +cdef class BasisFunction: + '''Abstract. Subclasses must implement the apply and __init__ methods.''' + + cdef BasisFunction parent + cdef dict child_map + cdef list children + cdef bint pruned + cdef bint prunable + cdef bint splittable + + cpdef smooth(BasisFunction self, dict knot_dict, dict translation) + + cpdef bint has_knot(BasisFunction self) + + cpdef bint is_prunable(BasisFunction self) + + cpdef bint is_pruned(BasisFunction self) + + cpdef bint is_splittable(BasisFunction self) + + cpdef bint make_splittable(BasisFunction self) + + cpdef bint make_unsplittable(BasisFunction self) + + cpdef list get_children(BasisFunction self) + + cpdef BasisFunction get_coverage(BasisFunction self, INDEX_t variable) + + cpdef bool has_linear(BasisFunction self, INDEX_t variable) + + cpdef bool linear_in(BasisFunction self, INDEX_t variable) + + cpdef _set_parent(BasisFunction self, BasisFunction parent) + + cpdef _add_child(BasisFunction self, BasisFunction child) + + cpdef BasisFunction get_parent(BasisFunction self) + + cpdef prune(BasisFunction self) + + cpdef unprune(BasisFunction self) + + cpdef knots(BasisFunction self, INDEX_t variable) + + cpdef INDEX_t effective_degree(BasisFunction self) + + cpdef apply(BasisFunction self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, bint recurse= ?) + + cpdef cnp.ndarray[INT_t, ndim = 1] valid_knots(BasisFunction self, + cnp.ndarray[FLOAT_t, ndim=1] values, + cnp.ndarray[FLOAT_t, ndim=1] variable, + int variable_idx, INDEX_t check_every, + int endspan, int minspan, + FLOAT_t minspan_alpha, INDEX_t n, + cnp.ndarray[INT_t, ndim=1] workspace) + +cdef class RootBasisFunction(BasisFunction): + cpdef bint covered(RootBasisFunction self, INDEX_t variable) + + cpdef bint eligible(RootBasisFunction self, INDEX_t variable) + + cpdef set variables(RootBasisFunction self) + + cpdef _smoothed_version(RootBasisFunction self, BasisFunction parent, + dict knot_dict, dict translation) + + cpdef INDEX_t degree(RootBasisFunction self) + + cpdef _effective_degree(RootBasisFunction self, dict data_dict, dict missing_dict) + + cpdef _set_parent(RootBasisFunction self, BasisFunction parent) + + cpdef BasisFunction get_parent(RootBasisFunction self) + + cpdef apply(RootBasisFunction self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, bint recurse=?) + + cpdef apply_deriv(RootBasisFunction self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, + cnp.ndarray[FLOAT_t, ndim=1] j, INDEX_t var) + +cdef class ConstantBasisFunction(RootBasisFunction): + + cpdef inline FLOAT_t eval(ConstantBasisFunction self) + + cpdef inline FLOAT_t eval_deriv(ConstantBasisFunction self) + +cdef class VariableBasisFunction(BasisFunction): + cdef INDEX_t variable + cdef readonly label + + cpdef INDEX_t degree(VariableBasisFunction self) + + cpdef set variables(VariableBasisFunction self) + + cpdef INDEX_t get_variable(VariableBasisFunction self) + +cdef class DataVariableBasisFunction(VariableBasisFunction): + cpdef _effective_degree(DataVariableBasisFunction self, dict data_dict, dict missing_dict) + + cpdef bint covered(DataVariableBasisFunction self, INDEX_t variable) + + cpdef bint eligible(DataVariableBasisFunction self, INDEX_t variable) + + cpdef apply(DataVariableBasisFunction self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, bint recurse=?) + + cpdef apply_deriv(DataVariableBasisFunction self, + cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, + cnp.ndarray[FLOAT_t, ndim=1] j, INDEX_t var) + +cdef class MissingnessBasisFunction(VariableBasisFunction): + cdef readonly bint complement + + cpdef _effective_degree(MissingnessBasisFunction self, dict data_dict, dict missing_dict) + + cpdef bint covered(MissingnessBasisFunction self, INDEX_t variable) + + cpdef bint eligible(MissingnessBasisFunction self, INDEX_t variable) + + cpdef bint covered(MissingnessBasisFunction self, INDEX_t variable) + + cpdef bint eligible(MissingnessBasisFunction self, INDEX_t variable) + + cpdef apply(MissingnessBasisFunction self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, bint recurse=?) + + cpdef apply_deriv(MissingnessBasisFunction self, + cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, + cnp.ndarray[FLOAT_t, ndim=1] j, INDEX_t var) + + cpdef _smoothed_version(MissingnessBasisFunction self, BasisFunction parent, + dict knot_dict, dict translation) + +cdef class HingeBasisFunctionBase(DataVariableBasisFunction): + cdef FLOAT_t knot + cdef INDEX_t knot_idx + cdef bint reverse + + cpdef bint has_knot(HingeBasisFunctionBase self) + + cpdef INDEX_t get_variable(HingeBasisFunctionBase self) + + cpdef FLOAT_t get_knot(HingeBasisFunctionBase self) + + cpdef bint get_reverse(HingeBasisFunctionBase self) + + cpdef INDEX_t get_knot_idx(HingeBasisFunctionBase self) + +cdef class SmoothedHingeBasisFunction(HingeBasisFunctionBase): + cdef FLOAT_t p + cdef FLOAT_t r + cdef FLOAT_t knot_minus + cdef FLOAT_t knot_plus + + cpdef _smoothed_version(SmoothedHingeBasisFunction self, + BasisFunction parent, dict knot_dict, + dict translation) + + cpdef get_knot_minus(SmoothedHingeBasisFunction self) + + cpdef get_knot_plus(SmoothedHingeBasisFunction self) + + cpdef _init_p_r(SmoothedHingeBasisFunction self) + + cpdef get_p(SmoothedHingeBasisFunction self) + + cpdef get_r(SmoothedHingeBasisFunction self) + +cdef class HingeBasisFunction(HingeBasisFunctionBase): + + cpdef _smoothed_version(HingeBasisFunction self, + BasisFunction parent, + dict knot_dict, dict translation) + +cdef class LinearBasisFunction(DataVariableBasisFunction): + cpdef bool linear_in(LinearBasisFunction self, INDEX_t variable) + + cpdef _smoothed_version(LinearBasisFunction self, BasisFunction parent, + dict knot_dict, dict translation) + +cdef class Basis: + '''A wrapper that provides functionality related to a set of BasisFunctions + with a common RootBasisFunction ancestor. Retains the order in which + BasisFunctions are added.''' + + cdef list order + cdef readonly INDEX_t num_variables +# cdef dict coverage + +# cpdef add_coverage(Basis self, int variable, MissingnessBasisFunction b1, \ +# MissingnessBasisFunction b2) +# +# cpdef get_coverage(Basis self, int variable) +# +# cpdef bint has_coverage(Basis self, int variable) + + cpdef int get_num_variables(Basis self) + + cpdef dict anova_decomp(Basis self) + + cpdef smooth(Basis self, cnp.ndarray[FLOAT_t, ndim=2] X) + + cpdef append(Basis self, BasisFunction basis_function) + + cpdef INDEX_t plen(Basis self) + + cpdef BasisFunction get(Basis self, INDEX_t i) + + cpdef transform(Basis self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=2] B) + + cpdef weighted_transform(Basis self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=2] B, + cnp.ndarray[FLOAT_t, ndim=1] weights) + + cpdef transform_deriv(Basis self, cnp.ndarray[FLOAT_t, ndim=2] X, + cnp.ndarray[BOOL_t, ndim=2] missing, + cnp.ndarray[FLOAT_t, ndim=1] b, + cnp.ndarray[FLOAT_t, ndim=1] j, + cnp.ndarray[FLOAT_t, ndim=2] coef, + cnp.ndarray[FLOAT_t, ndim=3] J, + list variables_of_interest, bool prezeroed_j=?) diff --git a/UQPyL/surrogates/mars/core/_forward.pxd b/UQPyL/surrogates/mars/core/_forward.pxd new file mode 100644 index 00000000..edb7f07b --- /dev/null +++ b/UQPyL/surrogates/mars/core/_forward.pxd @@ -0,0 +1,93 @@ +cimport numpy as cnp +import numpy as np +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t +from ._basis cimport Basis +from ._record cimport ForwardPassRecord +from ._knot_search cimport MultipleOutcomeDependentData + +# cdef dict stopping_conditions + +cdef class ForwardPasser: + + # User selected parameters + cdef int endspan + cdef int minspan + cdef FLOAT_t endspan_alpha + cdef FLOAT_t minspan_alpha + cdef int max_terms + cdef bint allow_linear + cdef int max_degree + cdef FLOAT_t thresh + cdef FLOAT_t penalty + cdef int check_every + cdef int min_search_points + cdef list xlabels + cdef FLOAT_t zero_tol + cdef list fast_heap + cdef int use_fast + cdef long fast_K + cdef long fast_h + cdef bint allow_missing + cdef int verbose + + # Input data + cdef cnp.ndarray X + cdef cnp.ndarray missing + cdef cnp.ndarray y + cdef cnp.ndarray y_col_sum + cdef cnp.ndarray y_row_sum + cdef cnp.ndarray sample_weight + cdef cnp.ndarray output_weight + cdef INDEX_t m + cdef INDEX_t n + cdef FLOAT_t sst + cdef FLOAT_t y_squared + cdef FLOAT_t total_weight + + # Knot search data + cdef MultipleOutcomeDependentData outcome + cdef list predictors + cdef list workings + cdef INDEX_t n_outcomes + + # Working floating point data + cdef cnp.ndarray B # Data matrix in basis space + cdef cnp.ndarray B_orth # Orthogonalized version of B + cdef cnp.ndarray c + cdef cnp.ndarray c_sqr + cdef cnp.ndarray norms + cdef cnp.ndarray u + cdef cnp.ndarray B_orth_times_parent_cum + cdef FLOAT_t c_squared + + # Working integer data + cdef cnp.ndarray sort_tracker + cdef cnp.ndarray sorting + cdef cnp.ndarray mwork + cdef cnp.ndarray linear_variables + cdef int iteration_number + cdef cnp.ndarray has_missing + + # Object construction + cdef ForwardPassRecord record + cdef Basis basis + + cpdef Basis get_basis(ForwardPasser self) + + cpdef init_linear_variables(ForwardPasser self) + + cpdef run(ForwardPasser self) + + cdef stop_check(ForwardPasser self) + + cpdef orthonormal_update(ForwardPasser self, b) + + cpdef orthonormal_downdate(ForwardPasser self) + + cdef next_pair(ForwardPasser self) + +# cdef best_knot(ForwardPasser self, INDEX_t parent, cnp.ndarray[FLOAT_t, ndim=1] x, +# INDEX_t k, cnp.ndarray[INT_t, ndim=1] candidates, +# cnp.ndarray[INT_t, ndim=1] order, +# FLOAT_t * mse, FLOAT_t * knot, +# INDEX_t * knot_idx) diff --git a/UQPyL/surrogates/mars/core/_knot_search.pxd b/UQPyL/surrogates/mars/core/_knot_search.pxd new file mode 100644 index 00000000..4c4bac24 --- /dev/null +++ b/UQPyL/surrogates/mars/core/_knot_search.pxd @@ -0,0 +1,94 @@ +cimport cython +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t +from ._basis cimport BasisFunction +from ._qr cimport UpdatingQT + +@cython.final +cdef class SingleWeightDependentData: + cdef readonly UpdatingQT updating_qt + cdef readonly FLOAT_t[:] w + cdef readonly INDEX_t m + cdef readonly INDEX_t k + cdef readonly INDEX_t max_terms + cdef readonly FLOAT_t[:, :] Q_t + cdef readonly FLOAT_t total_weight +# cpdef int update_from_basis_function(SingleWeightDependentData self, BasisFunction bf, FLOAT_t[:,:] X, +# BOOL_t[:,:] missing) except * + cpdef int update_from_array(SingleWeightDependentData self, FLOAT_t[:] b) except * +# cpdef int _update(SingleWeightDependentData self, FLOAT_t zero_tol) + cpdef downdate(SingleWeightDependentData self) + cpdef reweight(SingleWeightDependentData self, FLOAT_t[:] w, FLOAT_t[:,:] B, INDEX_t k) + +@cython.final +cdef class MultipleOutcomeDependentData: + cdef list outcomes + cdef list weights + cpdef update_from_array(MultipleOutcomeDependentData self, FLOAT_t[:] b) + cpdef downdate(MultipleOutcomeDependentData self) + cpdef list sse(MultipleOutcomeDependentData self) + cpdef FLOAT_t mse(MultipleOutcomeDependentData self) + +@cython.final +cdef class SingleOutcomeDependentData: + cdef readonly FLOAT_t[:] y + cdef readonly SingleWeightDependentData weight + cdef readonly FLOAT_t[:] theta + cdef public FLOAT_t omega + cdef public FLOAT_t sse_ + cdef public INDEX_t m + cdef public INDEX_t k + cdef public INDEX_t max_terms + cdef public object householder + cpdef FLOAT_t sse(SingleOutcomeDependentData self) + cpdef int synchronize(SingleOutcomeDependentData self) except * + cpdef int update(SingleOutcomeDependentData self) except * + cpdef downdate(SingleOutcomeDependentData self) + + +@cython.final +cdef class PredictorDependentData: + cdef readonly FLOAT_t[:] p + cdef readonly FLOAT_t[:] x + cdef readonly FLOAT_t[:] candidates + cdef readonly INDEX_t[:] order + +@cython.final +cdef class KnotSearchReadOnlyData: + cdef readonly PredictorDependentData predictor + cdef readonly MultipleOutcomeDependentData outcome + +@cython.final +cdef class KnotSearchState: + cdef public FLOAT_t alpha + cdef public FLOAT_t beta + cdef public FLOAT_t lambda_ + cdef public FLOAT_t mu + cdef public FLOAT_t upsilon + cdef public FLOAT_t phi + cdef public FLOAT_t phi_next + cdef public INDEX_t ord_idx + cdef public INDEX_t idx + cdef public FLOAT_t zeta_squared + +@cython.final +cdef class KnotSearchWorkingData: + cdef readonly FLOAT_t[:] gamma + cdef readonly FLOAT_t[:] kappa + cdef readonly FLOAT_t[:] delta_kappa + cdef readonly FLOAT_t[:] chi + cdef readonly FLOAT_t[:] psi + cdef KnotSearchState state + +@cython.final +cdef class KnotSearchData: + cdef readonly KnotSearchReadOnlyData constant + cdef readonly list workings + cdef public INDEX_t q + +cdef dot(FLOAT_t[:] x1, FLOAT_t[:] x2, INDEX_t q) +cdef w2dot(FLOAT_t[:] w, FLOAT_t[:] x1, FLOAT_t[:] x2, INDEX_t q) +cdef wdot(FLOAT_t[:] w, FLOAT_t[:] x1, FLOAT_t[:] x2, INDEX_t q) +cdef inline void fast_update(PredictorDependentData predictor, SingleOutcomeDependentData outcome, + KnotSearchWorkingData working, FLOAT_t[:] p, INDEX_t q, INDEX_t m ,INDEX_t r) except * +cpdef tuple knot_search(KnotSearchData data, FLOAT_t[:] candidates, FLOAT_t[:] p, INDEX_t q, INDEX_t m, INDEX_t r, INDEX_t n_outcomes, int verbose) + diff --git a/UQPyL/surrogates/mars/core/_pruning.pxd b/UQPyL/surrogates/mars/core/_pruning.pxd new file mode 100644 index 00000000..ccdbaf75 --- /dev/null +++ b/UQPyL/surrogates/mars/core/_pruning.pxd @@ -0,0 +1,25 @@ +cimport numpy as cnp +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t +from ._basis cimport Basis +from ._record cimport PruningPassRecord + +cdef class PruningPasser: + cdef cnp.ndarray X + cdef cnp.ndarray missing + cdef cnp.ndarray B + cdef cnp.ndarray y + cdef cnp.ndarray sample_weight + cdef int verbose + cdef cnp.ndarray output_weight + cdef public dict feature_importance + + cdef INDEX_t m + cdef INDEX_t n + cdef Basis basis + cdef FLOAT_t penalty + cdef FLOAT_t sst + cdef PruningPassRecord record + + cpdef run(PruningPasser self) + + cpdef PruningPassRecord trace(PruningPasser self) diff --git a/UQPyL/surrogates/mars/core/_qr.pxd b/UQPyL/surrogates/mars/core/_qr.pxd new file mode 100644 index 00000000..9aa805fc --- /dev/null +++ b/UQPyL/surrogates/mars/core/_qr.pxd @@ -0,0 +1,34 @@ +from cython cimport view +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t + +cdef class UpdatingQT: + cdef readonly int m + cdef readonly int max_n + cdef readonly Householder householder + cdef readonly int k + cdef readonly FLOAT_t[::1, :] Q_t + cdef readonly FLOAT_t zero_tol + cdef readonly BOOL_t[::1] dependent_cols + cpdef void update_qt(UpdatingQT self, bint dependent) + cpdef void update(UpdatingQT self, FLOAT_t[:] x) + cpdef void downdate(UpdatingQT self) + cpdef void reset(UpdatingQT self) + +cdef class Householder: + cdef readonly int k + cdef readonly int m + cdef readonly int max_n + cdef readonly FLOAT_t[::1, :] V + cdef readonly FLOAT_t[::1, :] T + cdef readonly FLOAT_t[::1] tau + cdef readonly FLOAT_t[::1] beta + cdef readonly FLOAT_t[::1, :] work + cdef readonly FLOAT_t zero_tol + cpdef void downdate(Householder self) + cpdef void reset(Householder self) + cpdef bint update_from_column(Householder self, FLOAT_t[:] c) + cpdef bint update_v_t(Householder self) + cpdef void left_apply(Householder self, FLOAT_t[::1, :] C) + cpdef void left_apply_transpose(Householder self, FLOAT_t[::1, :] C) + cpdef void right_apply(Householder self, FLOAT_t[::1, :] C) + cpdef void right_apply_transpose(Householder self, FLOAT_t[::1, :] C) diff --git a/UQPyL/surrogates/mars/core/_record.pxd b/UQPyL/surrogates/mars/core/_record.pxd new file mode 100644 index 00000000..7b52f949 --- /dev/null +++ b/UQPyL/surrogates/mars/core/_record.pxd @@ -0,0 +1,66 @@ +cimport numpy as cnp +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t +from ._basis cimport Basis + +cdef class Record: + cdef list iterations + cdef int num_samples + cdef int num_variables + cdef FLOAT_t penalty + cdef FLOAT_t sst # Sum of squares total + + cpdef append(Record self, Iteration iteration) + + cpdef FLOAT_t mse(Record self, INDEX_t iteration) + + cpdef FLOAT_t rsq(Record self, INDEX_t iteration) + + cpdef FLOAT_t gcv(Record self, INDEX_t iteration) + + cpdef FLOAT_t grsq(Record self, INDEX_t iteration) + +cdef class PruningPassRecord(Record): + cdef readonly INDEX_t selected + + cpdef set_selected(PruningPassRecord self, INDEX_t selected) + + cpdef INDEX_t get_selected(PruningPassRecord self) + + cpdef roll_back(PruningPassRecord self, Basis basis) + +cdef class ForwardPassRecord(Record): + cdef readonly int stopping_condition + + cdef list xlabels + + cpdef set_stopping_condition(ForwardPassRecord self, int stopping_condition) + +cdef class Iteration: + cdef FLOAT_t mse + cdef INDEX_t size + + cpdef FLOAT_t get_mse(Iteration self) + + cpdef INDEX_t get_size(Iteration self) + +cdef class PruningPassIteration(Iteration): + cdef INDEX_t pruned + + cpdef INDEX_t get_pruned(PruningPassIteration self) + +cdef class FirstPruningPassIteration(PruningPassIteration): + pass + +cdef class ForwardPassIteration(Iteration): + cdef INDEX_t parent + cdef INDEX_t variable + cdef FLOAT_t knot + cdef int code + cdef bint no_candidates + + cpdef set_no_candidates(ForwardPassIteration self, bint value) + + cpdef no_further_candidates(ForwardPassIteration self) + +cdef class FirstForwardPassIteration(ForwardPassIteration): + cpdef INDEX_t get_size(FirstForwardPassIteration self) diff --git a/UQPyL/surrogates/mars/core/_util.pxd b/UQPyL/surrogates/mars/core/_util.pxd new file mode 100644 index 00000000..9e174fbe --- /dev/null +++ b/UQPyL/surrogates/mars/core/_util.pxd @@ -0,0 +1,23 @@ +cimport numpy as cnp +from ._types cimport FLOAT_t, INT_t, INDEX_t, BOOL_t + +cdef FLOAT_t log2(FLOAT_t x) + +cpdef apply_weights_2d(cnp.ndarray[FLOAT_t, ndim=2] B, + cnp.ndarray[FLOAT_t, ndim=1] weights) + +cpdef apply_weights_slice(cnp.ndarray[FLOAT_t, ndim=2] B, + cnp.ndarray[FLOAT_t, ndim=1] weights, INDEX_t column) + +cpdef apply_weights_1d(cnp.ndarray[FLOAT_t, ndim=1] y, + cnp.ndarray[FLOAT_t, ndim=1] weights) + +cpdef FLOAT_t gcv(FLOAT_t mse, + FLOAT_t basis_size, FLOAT_t data_size, + FLOAT_t penalty) + +cpdef FLOAT_t gcv_adjust(FLOAT_t basis_size, FLOAT_t data_size, FLOAT_t penalty) + +cpdef str_pad(string, length) + +cpdef ascii_table(header, data, print_header=?, print_footer=?)