probabilistic-numerics · pnkraemer · Apr 5, 2022 · Mar 30, 2022 · Mar 30, 2022 · Mar 31, 2022
@@ -16,9 +16,8 @@
 
 
 class ODEFilter(_odesolver.ODESolver):
-    """Probabilistic ODE solver based on Gaussian filtering and smoothing.
-
-    This is based on continuous-discrete Gaussian filtering.
+    """Probabilistic ODE solver based on Gaussian filtering and smoothing. This is based
+    on continuous-discrete Gaussian filtering.
 
     Note: this is specific for IVPs and does not apply without
     further considerations to, e.g., BVPs.
@@ -32,19 +31,22 @@ class ODEFilter(_odesolver.ODESolver):
     information_operator
         Information operator.
     approx_strategy
-        Approximation strategy to turn an intractable information operator into a tractable one.
+        Approximation strategy to turn an intractable information operator into a
+        tractable one.
     with_smoothing
         To smooth after the solve or not to smooth after the solve.
     init_routine
         Initialization algorithm.
-        Either via fitting the prior to a few steps of a Runge-Kutta method (:class:`RungeKuttaInitialization`)
-        or via Taylor-mode automatic differentiation (:class:``TaylorModeInitialization``) [1]_.
+        Either via fitting the prior to a few steps of a Runge-Kutta method
+        (:class:`RungeKuttaInitialization`) or via Taylor-mode automatic differentiation
+        (:class:``TaylorModeInitialization``) [1]_.
     diffusion_model :
-        Diffusion model. This determines which kind of calibration is used. We refer to Bosch et al. (2020) [2]_ for a survey.
+        Diffusion model. This determines which kind of calibration is used. We refer to
+        Bosch et al. (2020) [2]_ for a survey.
     _reference_coordinates :
         Use this state as a reference state to compute the normalized error estimate.
-        Optional. Default is 0 (which amounts to the usual reference state for ODE solvers).
-        Another reasonable choice could be 1, but use this at your own risk!
+        Optional. Default is 0 (which amounts to the usual reference state for ODE
+        solvers). Another reasonable choice could be 1, but use this at your own risk!
 
     References
     ----------
@@ -54,6 +56,7 @@ class ODEFilter(_odesolver.ODESolver):
     .. [2] Bosch, N., and Hennig, P., and Tronarp, F..
         Calibrated adaptive probabilistic ODE solvers.
         2021.
+
     """
 
     def __init__(
@@ -145,50 +148,61 @@ def attempt_step(self, state, dt):
 
         It goes as follows:
 
-        1. The current state :math:`x(t) \sim \mathcal{N}(m(t), P(t))` is split into a deterministic component
-        and a noisy component,
+        1. The current state :math:`x(t) \sim \mathcal{N}(m(t), P(t))` is split into a
+        deterministic component and a noisy component,
 
         .. math::
             x(t) = m(t) + p(t), \quad p(t) \sim \mathcal{N}(0, P(t))
 
-        which is required for accurate calibration and error estimation: in order to only work with the local error,
-        ODE solvers often assume that the error at the previous step is zero.
+        which is required for accurate calibration and error estimation: in order to
+        only work with the local error, ODE solvers often assume that the error at the
+        previous step is zero.
 
-        2. The deterministic component is propagated through dynamics model and measurement model
+        2. The deterministic component is propagated through dynamics model and
+        measurement model
 
         .. math::
-            \hat{z}(t + \Delta t) \sim \mathcal{N}(H \Phi(\Delta t) m(t), H Q(\Delta t) H^\top)
+            \hat{z}(t+\Delta t)\sim\mathcal{N}(H\Phi(\Delta t)m(t),H Q(\Delta t) H^\top)
 
-        which is a random variable that estimates the expected local defect :math:`\dot y - f(y)`.
+        which is a random variable that estimates the expected local defect
+        :math:`\dot y - f(y)`.
 
-        3. The counterpart of :math:`\hat{z}` in the (much more likely) interpretation of an erronous previous state
-        (recall that :math:`\hat z` was based on the interpretation of an error-free previous state) is computed as,
+        3. The counterpart of :math:`\hat{z}` in the (much more likely) interpretation
+        of an erronous previous state (recall that :math:`\hat z` was based on the
+        interpretation of an error-free previous state) is computed as,
 
         .. math::
             z(t + \Delta t) \sim \mathcal{N}(\mathbb{E}[\hat{z}(t + \Delta t)],
-            \mathbb{C}[\hat{z}(t + \Delta t)] + H \Phi(\Delta t) P(t) \Phi(\Delta t)^\top H^\top ),
+            \mathbb{C}[\hat{z}(t+\Delta t)]+H\Phi(\Delta t)P(t)\Phi(\Delta t)^\top
+            H^\top),
 
         which acknowledges the covariance :math:`P(t)` of the previous state.
         :math:`\mathbb{E}` is the mean, and :math:`\mathbb{C}` is the covariance.
-        Both :math:`z(t + \Delta t)` and :math:`\hat z(t + \Delta t)` give rise to a reasonable diffusion_model estimate.
-        Which one to use is handled by the ``Diffusion`` attribute of the solver.
-        At this point already we can compute a local error estimate of the current step.
+        Both :math:`z(t + \Delta t)` and :math:`\hat z(t + \Delta t)` give rise to a
+        reasonable diffusion_model estimate. Which one to use is handled by the
+        ``Diffusion`` attribute of the solver. At this point already we can compute a
+        local error estimate of the current step.
 
         4. Depending on the diffusion model, there are two options now:
 
-            4.1. For a piecewise constant diffusion, the covariances are calibrated locally -- that is, at each step.
-            In this case we update the predicted covariance and measured covariance with the most recent diffusion estimate.
+            4.1. For a piecewise constant diffusion, the covariances are calibrated
+            locally -- that is, at each step. In this case we update the predicted
+            covariance and measured covariance with the most recent diffusion estimate.
+
+            4.2. For a constant diffusion, the calibration happens post hoc, and the
+            only step that is carried out here is an assembly of the full predicted
+            random variable (up to now, only its parts were available).
 
-            4.2. For a constant diffusion, the calibration happens post hoc, and the only step that is carried out here is an assembly
-            of the full predicted random variable (up to now, only its parts were available).
+        5. With the results of either 4.1. or 4.2. (which both return a predicted RV and
+        a measured RV), we finally compute the Kalman update and return the result.
+        Recall that the error estimate has been computed in the third step.
 
-        5. With the results of either 4.1. or 4.2. (which both return a predicted RV and a measured RV),
-        we finally compute the Kalman update and return the result. Recall that the error estimate has been computed in the third step.
         """
 
         # Read off system matrices; required for calibration / error estimation
         # Use only where a full call to forward_*() would be too costly.
-        # We use the mathematical symbol `Phi` (and later, `H`), because this makes it easier to read for us.
+        # We use the mathematical symbol `Phi` (and later, `H`), because this makes
+        # it easier to read for us.
         discrete_dynamics = self.prior_process.transition.discretise(dt)
 
         Phi = discrete_dynamics.transition_matrix
@@ -217,7 +231,8 @@ def attempt_step(self, state, dt):
         # Compute the measurements for the full components.
         # Since the means of noise-free and noisy measurements coincide,
         # we manually update only the covariance.
-        # The first two are only matrix square-roots and will be turned into proper Cholesky factors below.
+        # The first two are only matrix square-roots and will be turned into proper
+        # Cholesky factors below.
         pred_sqrtm = Phi @ noisy_component.cov_cholesky
         meas_sqrtm = H @ pred_sqrtm
         full_meas_cov_cholesky = utils.linalg.cholesky_update(
@@ -247,7 +262,8 @@ def attempt_step(self, state, dt):
 
         # Overwrite the acceptance/rejection step here, because we need control over
         # Appending or not appending the diffusion (and because the computations below
-        # are sufficiently costly such that skipping them here will have a positive impact).
+        # are sufficiently costly such that skipping them here will have a positive
+        # impact).
         internal_norm = self.steprule.errorest_to_norm(
             errorest=local_errors,
             reference_state=reference_values,

@@ -12,7 +12,8 @@
 class ODEFilterSolution(_odesolution.ODESolution):
     """Probabilistic ODE solution corresponding to the :class:`ODEFilter`.
 
-    Recall that in ProbNum, Gaussian filtering and smoothing is generally named "Kalman".
+    Recall that in ProbNum, Gaussian filtering and smoothing is generally named
+    "Kalman".
 
     Parameters
     ----------
@@ -118,8 +119,10 @@ def transform_base_measure_realizations(
         t: ArrayLike = None,
     ) -> np.ndarray:
         errormsg = (
-            "The ODEFilterSolution does not implement transformation of realizations of a base measure."
-            "Try `ODEFilterSolution.kalman_posterior.transform_base_measure_realizations` instead."
+            "The ODEFilterSolution does not implement transformation of realizations of"
+            " a base measure. Try "
+            "`ODEFilterSolution.kalman_posterior.transform_base_measure_realizations` "
+            "instead."
         )
 
         raise NotImplementedError(errormsg)
@@ -130,16 +133,17 @@ def filtering_solution(self):
         if isinstance(self.kalman_posterior, filtsmooth.gaussian.FilteringPosterior):
             return self
 
-        # else: self.kalman_posterior is a SmoothingPosterior object, which has the field filter_posterior.
+        # else: self.kalman_posterior is a SmoothingPosterior object, which has the
+        # field filter_posterior.
         return ODEFilterSolution(
             kalman_posterior=self.kalman_posterior.filtering_posterior
         )
 
 
 def _project_rv(projmat, rv):
-    # There is no way of checking whether `rv` has its Cholesky factor computed already or not.
-    # Therefore, since we need to update the Cholesky factor for square-root filtering,
-    # we also update the Cholesky factor for non-square-root algorithms here,
+    # There is no way of checking whether `rv` has its Cholesky factor computed already
+    # or not. Therefore, since we need to update the Cholesky factor for square-root
+    # filtering, we also update the Cholesky factor for non-square-root algorithms here,
     # which implies additional cost.
     # See Issues #319 and #329.
     # When they are resolved, this function here will hopefully be superfluous.

@@ -17,11 +17,12 @@ class ApproximateInformationOperator(
     An approximate information operator is a version of an information operator that
     differs from its non-approximated operator in two ways:
 
-    1) When it is transformed into a transition, the output is an approximate transition such as an EKF component.
+    1) When it is transformed into a transition, the output is an approximate transition
+       such as an EKF component.
     2) The Jacobian might be different to the Jacobian of the original version.
 
-    Approximate information operators are returned by approximation strategies such as EK0 and EK1.
-    For instance, the EK0 changes the Jacobian of the information operator
+    Approximate information operators are returned by approximation strategies such as
+    EK0 and EK1. For instance, the EK0 changes the Jacobian of the information operator
     (in the sense that it sets the Jacobian of the ODE vector field to zero).
     """
 

@@ -14,9 +14,9 @@
 class InformationOperator(abc.ABC):
     r"""Information operators used in probabilistic ODE solvers.
 
-    ODE solver-related information operators gather information about whether a state or function solves an ODE.
-    More specifically, an information operator maps a sample from the prior distribution
-    **that is also an ODE solution** to the zero function.
+    ODE solver-related information operators gather information about whether a state or
+    function solves an ODE. More specifically, an information operator maps a sample
+    from the prior distribution **that is also an ODE solution** to the zero function.
 
     Consider the following example. For an ODE
 
@@ -30,10 +30,11 @@ class InformationOperator(abc.ABC):
     (Recall that :math:`Y_j` models the `j` th derivative of `Y_0` for given prior.)
     If :math:`Y_0` solves the ODE, :math:`\mathcal{Z}(Y)(t)` is zero for all :math:`t`.
 
-    Information operators are used to condition prior distributions on solving a numerical problem.
-    This happens by conditioning the prior distribution :math:`Y` on :math:`\mathcal{Z}(Y)(t_n)=0`
-    on time-points :math:`t_1, ..., t_n, ..., t_N` (:math:`N` is usually large).
-    Therefore, they are one important component in a probabilistic ODE solver.
+    Information operators are used to condition prior distributions on solving a
+    numerical problem. This happens by conditioning the prior distribution :math:`Y` on
+    :math:`\mathcal{Z}(Y)(t_n)=0` on time-points :math:`t_1, ..., t_n, ..., t_N`
+    (:math:`N` is usually large). Therefore, they are one important component in a
+    probabilistic ODE solver.
     """
 
     def __init__(self, input_dim: IntLike, output_dim: IntLike):
@@ -70,8 +71,9 @@ def as_transition(
 class ODEInformationOperator(InformationOperator):
     """Information operators that depend on an ODE function.
 
-    Other than :class:`InformationOperator`s, :class:`ODEInformationOperators` depend explicitly on an
-    :class:`InitialValueProblem`. Not all information operators that are used in ODE solvers do.
+    Other than :class:`InformationOperator`s, :class:`ODEInformationOperators` depend
+    explicitly on an :class:`InitialValueProblem`. Not all information operators that
+    are used in ODE solvers do.
     """
 
     def __init__(self, input_dim: IntLike, output_dim: IntLike):
@@ -84,8 +86,8 @@ def incorporate_ode(self, ode: problems.InitialValueProblem):
         """Incorporate the ODE into the operator."""
         if self.ode_has_been_incorporated:
             raise ValueError("An ODE has been incorporated already.")
-        else:
-            self.ode = ode
+
+        self.ode = ode
 
     @property
     def ode_has_been_incorporated(self) -> bool:

@@ -11,22 +11,26 @@
 * ``num_derivatives = 3,4,5``: :class:`NonProbabilisticFitWithJacobian`
   if the Jacobian of the ODE vector field is available,
   or :class:`NonProbabilisticFit` if not.
-* ``num_derivatives > 5``: :class:`TaylorMode`. For orders 6 and 7, :class:`ForwardModeJVP` might work well too.
-  :class:`TaylorMode` shines for ``num_derivatives >> 5``.
+* ``num_derivatives > 5``: :class:`TaylorMode`. For orders 6 and 7,
+  :class:`ForwardModeJVP` might work well too. :class:`TaylorMode` shines for
+  ``num_derivatives >> 5``.
 
 Initialization routines for high-dimensional ODEs are not implemented efficiently yet.
 
 It may also be worth noting:
 
 * Only automatic-differentiation-based routines yield the exact initialization.
   This becomes more desirable, the larger the number of modelled derivatives is.
-* :class:`ForwardModeJVP` is generally more efficient than :class:`ForwardMode`. The jury is still out on the efficiency of :class:`ReverseMode`.
-* :class:`Stack` and :class:`StackWithJacobian` are the only routines that come essentially for free.
+* :class:`ForwardModeJVP` is generally more efficient than :class:`ForwardMode`. The
+  jury is still out on the efficiency of :class:`ReverseMode`.
+* :class:`Stack` and :class:`StackWithJacobian` are the only routines that come
+  essentially for free.
   The other routines rely on either inference or automatic differentiation.
-* For stiff ODEs, prefer :class:`NonProbabilisticFitWithJacobian` with ``BDF`` or ``Radau``
-  over :class:`NonProbabilisticFit` (or use one of the automatic-differentiation-based routines).
-* Initialization routines can be chained together. For example, build a ``prior_process`` with an ``initrv``
-  that is generated by :class:`StackWithJacobian`,
+* For stiff ODEs, prefer :class:`NonProbabilisticFitWithJacobian` with ``BDF`` or
+  ``Radau`` over :class:`NonProbabilisticFit` (or use one of the
+  automatic-differentiation-based routines).
+* Initialization routines can be chained together. For example, build a
+  ``prior_process`` with an ``initrv`` that is generated by :class:`StackWithJacobian`,
   and initialize the ODE filter with :class:`NonProbabilisticFitWithJacobian`.
 """
 

@@ -11,9 +11,9 @@
 __all__ = ["ivp_to_regression_problem"]
 
 # The ODE information operator is not optional, because in order to create it
-# one needs to know the order of the algorithm that is desired (i.e. num_prior_derivatives).
-# Since this is a weird input for the function, it seems safer to just require
-# the full operator.
+# one needs to know the order of the algorithm that is desired
+# (i.e. num_prior_derivatives). Since this is a weird input for the function, it seems
+# safer to just require the full operator.
 def ivp_to_regression_problem(
     ivp: problems.InitialValueProblem,
     locations: Union[Sequence, np.ndarray],

@@ -27,7 +27,8 @@ def __init__(self, solver_type: rk.RungeKutta, steprule):
         # Dopri853 as implemented in SciPy computes the dense output differently.
         if issubclass(solver_type, rk.DOP853):
             raise TypeError(
-                "Dense output interpolation of DOP853 is currently not supported. Choose a different RK-method."
+                "Dense output interpolation of DOP853 is currently not supported. "
+                "Choose a different RK-method."
             )
 
         super().__init__(steprule=steprule, order=solver_type.order)

@@ -34,17 +34,17 @@ def perturb_uniform(
     References
     ----------
     .. [1] Abdulle, A. and Garegnani, G.
-        Random time step probabilistic methods for uncertainty quantification in chaotic and geometric numerical integration.
-        Statistics and Computing. 2020.
+        Random time step probabilistic methods for uncertainty quantification in chaotic
+        and geometric numerical integration. Statistics and Computing. 2020.
     """
     if step >= 1.0:
         raise ValueError("Stepsize too large (>= 1)")
-    else:
-        uniform_rv_samples = scipy.stats.uniform.rvs(random_state=rng, size=size)
-        shift = noise_scale * step ** (solver_order + 0.5)
-        left_boundary = step - shift
-        right_boundary = step + shift
-        samples = left_boundary + (right_boundary - left_boundary) * uniform_rv_samples
+
+    uniform_rv_samples = scipy.stats.uniform.rvs(random_state=rng, size=size)
+    shift = noise_scale * step ** (solver_order + 0.5)
+    left_boundary = step - shift
+    right_boundary = step + shift
+    samples = left_boundary + (right_boundary - left_boundary) * uniform_rv_samples
     return samples
 
 
@@ -75,7 +75,8 @@ def perturb_lognormal(
     References
     ----------
     .. [1] Abdulle, A. and Garegnani, G.
-        Random time step probabilistic methods for uncertainty quantification in chaotic and geometric numerical integration.
+        Random time step probabilistic methods for uncertainty quantification in chaotic
+        and geometric numerical integration.
         Statistics and Computing. 2020.
     """
     shift = 0.5 * np.log(1 + noise_scale * (step ** (2 * solver_order)))