import operator import numpy as np from numpy.core.multiarray import normalize_axis_index from scipy.linalg import (get_lapack_funcs, LinAlgError, cholesky_banded, cho_solve_banded, solve, solve_banded) from scipy.optimize import minimize_scalar from . import _bspl from . import _fitpack_impl from scipy._lib._util import prod from scipy.sparse import csr_array from scipy.special import poch from itertools import combinations __all__ = ["BSpline", "make_interp_spline", "make_lsq_spline", "make_smoothing_spline"] def _get_dtype(dtype): """Return np.complex128 for complex dtypes, np.float64 otherwise.""" if np.issubdtype(dtype, np.complexfloating): return np.complex_ else: return np.float_ def _as_float_array(x, check_finite=False): """Convert the input into a C contiguous float array. NB: Upcasts half- and single-precision floats to double precision. """ x = np.ascontiguousarray(x) dtyp = _get_dtype(x.dtype) x = x.astype(dtyp, copy=False) if check_finite and not np.isfinite(x).all(): raise ValueError("Array must not contain infs or nans.") return x def _dual_poly(j, k, t, y): """ Dual polynomial of the B-spline B_{j,k,t} - polynomial which is associated with B_{j,k,t}: $p_{j,k}(y) = (y - t_{j+1})(y - t_{j+2})...(y - t_{j+k})$ """ if k == 0: return 1 return np.prod([(y - t[j + i]) for i in range(1, k + 1)]) def _diff_dual_poly(j, k, y, d, t): """ d-th derivative of the dual polynomial $p_{j,k}(y)$ """ if d == 0: return _dual_poly(j, k, t, y) if d == k: return poch(1, k) comb = list(combinations(range(j + 1, j + k + 1), d)) res = 0 for i in range(len(comb) * len(comb[0])): res += np.prod([(y - t[j + p]) for p in range(1, k + 1) if (j + p) not in comb[i//d]]) return res class BSpline: r"""Univariate spline in the B-spline basis. .. math:: S(x) = \sum_{j=0}^{n-1} c_j B_{j, k; t}(x) where :math:`B_{j, k; t}` are B-spline basis functions of degree `k` and knots `t`. Parameters ---------- t : ndarray, shape (n+k+1,) knots c : ndarray, shape (>=n, ...) spline coefficients k : int B-spline degree extrapolate : bool or 'periodic', optional whether to extrapolate beyond the base interval, ``t[k] .. t[n]``, or to return nans. If True, extrapolates the first and last polynomial pieces of b-spline functions active on the base interval. If 'periodic', periodic extrapolation is used. Default is True. axis : int, optional Interpolation axis. Default is zero. Attributes ---------- t : ndarray knot vector c : ndarray spline coefficients k : int spline degree extrapolate : bool If True, extrapolates the first and last polynomial pieces of b-spline functions active on the base interval. axis : int Interpolation axis. tck : tuple A read-only equivalent of ``(self.t, self.c, self.k)`` Methods ------- __call__ basis_element derivative antiderivative integrate construct_fast design_matrix from_power_basis Notes ----- B-spline basis elements are defined via .. math:: B_{i, 0}(x) = 1, \textrm{if $t_i \le x < t_{i+1}$, otherwise $0$,} B_{i, k}(x) = \frac{x - t_i}{t_{i+k} - t_i} B_{i, k-1}(x) + \frac{t_{i+k+1} - x}{t_{i+k+1} - t_{i+1}} B_{i+1, k-1}(x) **Implementation details** - At least ``k+1`` coefficients are required for a spline of degree `k`, so that ``n >= k+1``. Additional coefficients, ``c[j]`` with ``j > n``, are ignored. - B-spline basis elements of degree `k` form a partition of unity on the *base interval*, ``t[k] <= x <= t[n]``. Examples -------- Translating the recursive definition of B-splines into Python code, we have: >>> def B(x, k, i, t): ... if k == 0: ... return 1.0 if t[i] <= x < t[i+1] else 0.0 ... if t[i+k] == t[i]: ... c1 = 0.0 ... else: ... c1 = (x - t[i])/(t[i+k] - t[i]) * B(x, k-1, i, t) ... if t[i+k+1] == t[i+1]: ... c2 = 0.0 ... else: ... c2 = (t[i+k+1] - x)/(t[i+k+1] - t[i+1]) * B(x, k-1, i+1, t) ... return c1 + c2 >>> def bspline(x, t, c, k): ... n = len(t) - k - 1 ... assert (n >= k+1) and (len(c) >= n) ... return sum(c[i] * B(x, k, i, t) for i in range(n)) Note that this is an inefficient (if straightforward) way to evaluate B-splines --- this spline class does it in an equivalent, but much more efficient way. Here we construct a quadratic spline function on the base interval ``2 <= x <= 4`` and compare with the naive way of evaluating the spline: >>> from scipy.interpolate import BSpline >>> k = 2 >>> t = [0, 1, 2, 3, 4, 5, 6] >>> c = [-1, 2, 0, -1] >>> spl = BSpline(t, c, k) >>> spl(2.5) array(1.375) >>> bspline(2.5, t, c, k) 1.375 Note that outside of the base interval results differ. This is because `BSpline` extrapolates the first and last polynomial pieces of B-spline functions active on the base interval. >>> import matplotlib.pyplot as plt >>> import numpy as np >>> fig, ax = plt.subplots() >>> xx = np.linspace(1.5, 4.5, 50) >>> ax.plot(xx, [bspline(x, t, c ,k) for x in xx], 'r-', lw=3, label='naive') >>> ax.plot(xx, spl(xx), 'b-', lw=4, alpha=0.7, label='BSpline') >>> ax.grid(True) >>> ax.legend(loc='best') >>> plt.show() References ---------- .. [1] Tom Lyche and Knut Morken, Spline methods, http://www.uio.no/studier/emner/matnat/ifi/INF-MAT5340/v05/undervisningsmateriale/ .. [2] Carl de Boor, A practical guide to splines, Springer, 2001. """ def __init__(self, t, c, k, extrapolate=True, axis=0): super().__init__() self.k = operator.index(k) self.c = np.asarray(c) self.t = np.ascontiguousarray(t, dtype=np.float64) if extrapolate == 'periodic': self.extrapolate = extrapolate else: self.extrapolate = bool(extrapolate) n = self.t.shape[0] - self.k - 1 axis = normalize_axis_index(axis, self.c.ndim) # Note that the normalized axis is stored in the object. self.axis = axis if axis != 0: # roll the interpolation axis to be the first one in self.c # More specifically, the target shape for self.c is (n, ...), # and axis !=0 means that we have c.shape (..., n, ...) # ^ # axis self.c = np.moveaxis(self.c, axis, 0) if k < 0: raise ValueError("Spline order cannot be negative.") if self.t.ndim != 1: raise ValueError("Knot vector must be one-dimensional.") if n < self.k + 1: raise ValueError("Need at least %d knots for degree %d" % (2*k + 2, k)) if (np.diff(self.t) < 0).any(): raise ValueError("Knots must be in a non-decreasing order.") if len(np.unique(self.t[k:n+1])) < 2: raise ValueError("Need at least two internal knots.") if not np.isfinite(self.t).all(): raise ValueError("Knots should not have nans or infs.") if self.c.ndim < 1: raise ValueError("Coefficients must be at least 1-dimensional.") if self.c.shape[0] < n: raise ValueError("Knots, coefficients and degree are inconsistent.") dt = _get_dtype(self.c.dtype) self.c = np.ascontiguousarray(self.c, dtype=dt) @classmethod def construct_fast(cls, t, c, k, extrapolate=True, axis=0): """Construct a spline without making checks. Accepts same parameters as the regular constructor. Input arrays `t` and `c` must of correct shape and dtype. """ self = object.__new__(cls) self.t, self.c, self.k = t, c, k self.extrapolate = extrapolate self.axis = axis return self @property def tck(self): """Equivalent to ``(self.t, self.c, self.k)`` (read-only). """ return self.t, self.c, self.k @classmethod def basis_element(cls, t, extrapolate=True): """Return a B-spline basis element ``B(x | t[0], ..., t[k+1])``. Parameters ---------- t : ndarray, shape (k+2,) internal knots extrapolate : bool or 'periodic', optional whether to extrapolate beyond the base interval, ``t[0] .. t[k+1]``, or to return nans. If 'periodic', periodic extrapolation is used. Default is True. Returns ------- basis_element : callable A callable representing a B-spline basis element for the knot vector `t`. Notes ----- The degree of the B-spline, `k`, is inferred from the length of `t` as ``len(t)-2``. The knot vector is constructed by appending and prepending ``k+1`` elements to internal knots `t`. Examples -------- Construct a cubic B-spline: >>> import numpy as np >>> from scipy.interpolate import BSpline >>> b = BSpline.basis_element([0, 1, 2, 3, 4]) >>> k = b.k >>> b.t[k:-k] array([ 0., 1., 2., 3., 4.]) >>> k 3 Construct a quadratic B-spline on ``[0, 1, 1, 2]``, and compare to its explicit form: >>> t = [0, 1, 1, 2] >>> b = BSpline.basis_element(t) >>> def f(x): ... return np.where(x < 1, x*x, (2. - x)**2) >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots() >>> x = np.linspace(0, 2, 51) >>> ax.plot(x, b(x), 'g', lw=3) >>> ax.plot(x, f(x), 'r', lw=8, alpha=0.4) >>> ax.grid(True) >>> plt.show() """ k = len(t) - 2 t = _as_float_array(t) t = np.r_[(t[0]-1,) * k, t, (t[-1]+1,) * k] c = np.zeros_like(t) c[k] = 1. return cls.construct_fast(t, c, k, extrapolate) @classmethod def design_matrix(cls, x, t, k, extrapolate=False): """ Returns a design matrix as a CSR format sparse array. Parameters ---------- x : array_like, shape (n,) Points to evaluate the spline at. t : array_like, shape (nt,) Sorted 1D array of knots. k : int B-spline degree. extrapolate : bool or 'periodic', optional Whether to extrapolate based on the first and last intervals or raise an error. If 'periodic', periodic extrapolation is used. Default is False. .. versionadded:: 1.10.0 Returns ------- design_matrix : `csr_array` object Sparse matrix in CSR format where each row contains all the basis elements of the input row (first row = basis elements of x[0], ..., last row = basis elements x[-1]). Examples -------- Construct a design matrix for a B-spline >>> from scipy.interpolate import make_interp_spline, BSpline >>> import numpy as np >>> x = np.linspace(0, np.pi * 2, 4) >>> y = np.sin(x) >>> k = 3 >>> bspl = make_interp_spline(x, y, k=k) >>> design_matrix = bspl.design_matrix(x, bspl.t, k) >>> design_matrix.toarray() [[1. , 0. , 0. , 0. ], [0.2962963 , 0.44444444, 0.22222222, 0.03703704], [0.03703704, 0.22222222, 0.44444444, 0.2962963 ], [0. , 0. , 0. , 1. ]] Construct a design matrix for some vector of knots >>> k = 2 >>> t = [-1, 0, 1, 2, 3, 4, 5, 6] >>> x = [1, 2, 3, 4] >>> design_matrix = BSpline.design_matrix(x, t, k).toarray() >>> design_matrix [[0.5, 0.5, 0. , 0. , 0. ], [0. , 0.5, 0.5, 0. , 0. ], [0. , 0. , 0.5, 0.5, 0. ], [0. , 0. , 0. , 0.5, 0.5]] This result is equivalent to the one created in the sparse format >>> c = np.eye(len(t) - k - 1) >>> design_matrix_gh = BSpline(t, c, k)(x) >>> np.allclose(design_matrix, design_matrix_gh, atol=1e-14) True Notes ----- .. versionadded:: 1.8.0 In each row of the design matrix all the basis elements are evaluated at the certain point (first row - x[0], ..., last row - x[-1]). `nt` is a length of the vector of knots: as far as there are `nt - k - 1` basis elements, `nt` should be not less than `2 * k + 2` to have at least `k + 1` basis element. Out of bounds `x` raises a ValueError. """ x = _as_float_array(x, True) t = _as_float_array(t, True) if extrapolate != 'periodic': extrapolate = bool(extrapolate) if k < 0: raise ValueError("Spline order cannot be negative.") if t.ndim != 1 or np.any(t[1:] < t[:-1]): raise ValueError(f"Expect t to be a 1-D sorted array_like, but " f"got t={t}.") # There are `nt - k - 1` basis elements in a BSpline built on the # vector of knots with length `nt`, so to have at least `k + 1` basis # elements we need to have at least `2 * k + 2` elements in the vector # of knots. if len(t) < 2 * k + 2: raise ValueError(f"Length t is not enough for k={k}.") if extrapolate == 'periodic': # With periodic extrapolation we map x to the segment # [t[k], t[n]]. n = t.size - k - 1 x = t[k] + (x - t[k]) % (t[n] - t[k]) extrapolate = False elif not extrapolate and ( (min(x) < t[k]) or (max(x) > t[t.shape[0] - k - 1]) ): # Checks from `find_interval` function raise ValueError(f'Out of bounds w/ x = {x}.') # Compute number of non-zeros of final CSR array in order to determine # the dtype of indices and indptr of the CSR array. n = x.shape[0] nnz = n * (k + 1) if nnz < np.iinfo(np.int32).max: int_dtype = np.int32 else: int_dtype = np.int64 # Preallocate indptr and indices indices = np.empty(n * (k + 1), dtype=int_dtype) indptr = np.arange(0, (n + 1) * (k + 1), k + 1, dtype=int_dtype) # indptr is not passed to Cython as it is already fully computed data, indices = _bspl._make_design_matrix( x, t, k, extrapolate, indices ) return csr_array( (data, indices, indptr), shape=(x.shape[0], t.shape[0] - k - 1) ) def __call__(self, x, nu=0, extrapolate=None): """ Evaluate a spline function. Parameters ---------- x : array_like points to evaluate the spline at. nu : int, optional derivative to evaluate (default is 0). extrapolate : bool or 'periodic', optional whether to extrapolate based on the first and last intervals or return nans. If 'periodic', periodic extrapolation is used. Default is `self.extrapolate`. Returns ------- y : array_like Shape is determined by replacing the interpolation axis in the coefficient array with the shape of `x`. """ if extrapolate is None: extrapolate = self.extrapolate x = np.asarray(x) x_shape, x_ndim = x.shape, x.ndim x = np.ascontiguousarray(x.ravel(), dtype=np.float_) # With periodic extrapolation we map x to the segment # [self.t[k], self.t[n]]. if extrapolate == 'periodic': n = self.t.size - self.k - 1 x = self.t[self.k] + (x - self.t[self.k]) % (self.t[n] - self.t[self.k]) extrapolate = False out = np.empty((len(x), prod(self.c.shape[1:])), dtype=self.c.dtype) self._ensure_c_contiguous() self._evaluate(x, nu, extrapolate, out) out = out.reshape(x_shape + self.c.shape[1:]) if self.axis != 0: # transpose to move the calculated values to the interpolation axis l = list(range(out.ndim)) l = l[x_ndim:x_ndim+self.axis] + l[:x_ndim] + l[x_ndim+self.axis:] out = out.transpose(l) return out def _evaluate(self, xp, nu, extrapolate, out): _bspl.evaluate_spline(self.t, self.c.reshape(self.c.shape[0], -1), self.k, xp, nu, extrapolate, out) def _ensure_c_contiguous(self): """ c and t may be modified by the user. The Cython code expects that they are C contiguous. """ if not self.t.flags.c_contiguous: self.t = self.t.copy() if not self.c.flags.c_contiguous: self.c = self.c.copy() def derivative(self, nu=1): """Return a B-spline representing the derivative. Parameters ---------- nu : int, optional Derivative order. Default is 1. Returns ------- b : BSpline object A new instance representing the derivative. See Also -------- splder, splantider """ c = self.c # pad the c array if needed ct = len(self.t) - len(c) if ct > 0: c = np.r_[c, np.zeros((ct,) + c.shape[1:])] tck = _fitpack_impl.splder((self.t, c, self.k), nu) return self.construct_fast(*tck, extrapolate=self.extrapolate, axis=self.axis) def antiderivative(self, nu=1): """Return a B-spline representing the antiderivative. Parameters ---------- nu : int, optional Antiderivative order. Default is 1. Returns ------- b : BSpline object A new instance representing the antiderivative. Notes ----- If antiderivative is computed and ``self.extrapolate='periodic'``, it will be set to False for the returned instance. This is done because the antiderivative is no longer periodic and its correct evaluation outside of the initially given x interval is difficult. See Also -------- splder, splantider """ c = self.c # pad the c array if needed ct = len(self.t) - len(c) if ct > 0: c = np.r_[c, np.zeros((ct,) + c.shape[1:])] tck = _fitpack_impl.splantider((self.t, c, self.k), nu) if self.extrapolate == 'periodic': extrapolate = False else: extrapolate = self.extrapolate return self.construct_fast(*tck, extrapolate=extrapolate, axis=self.axis) def integrate(self, a, b, extrapolate=None): """Compute a definite integral of the spline. Parameters ---------- a : float Lower limit of integration. b : float Upper limit of integration. extrapolate : bool or 'periodic', optional whether to extrapolate beyond the base interval, ``t[k] .. t[-k-1]``, or take the spline to be zero outside of the base interval. If 'periodic', periodic extrapolation is used. If None (default), use `self.extrapolate`. Returns ------- I : array_like Definite integral of the spline over the interval ``[a, b]``. Examples -------- Construct the linear spline ``x if x < 1 else 2 - x`` on the base interval :math:`[0, 2]`, and integrate it >>> from scipy.interpolate import BSpline >>> b = BSpline.basis_element([0, 1, 2]) >>> b.integrate(0, 1) array(0.5) If the integration limits are outside of the base interval, the result is controlled by the `extrapolate` parameter >>> b.integrate(-1, 1) array(0.0) >>> b.integrate(-1, 1, extrapolate=False) array(0.5) >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots() >>> ax.grid(True) >>> ax.axvline(0, c='r', lw=5, alpha=0.5) # base interval >>> ax.axvline(2, c='r', lw=5, alpha=0.5) >>> xx = [-1, 1, 2] >>> ax.plot(xx, b(xx)) >>> plt.show() """ if extrapolate is None: extrapolate = self.extrapolate # Prepare self.t and self.c. self._ensure_c_contiguous() # Swap integration bounds if needed. sign = 1 if b < a: a, b = b, a sign = -1 n = self.t.size - self.k - 1 if extrapolate != "periodic" and not extrapolate: # Shrink the integration interval, if needed. a = max(a, self.t[self.k]) b = min(b, self.t[n]) if self.c.ndim == 1: # Fast path: use FITPACK's routine # (cf _fitpack_impl.splint). integral = _fitpack_impl.splint(a, b, self.tck) return integral * sign out = np.empty((2, prod(self.c.shape[1:])), dtype=self.c.dtype) # Compute the antiderivative. c = self.c ct = len(self.t) - len(c) if ct > 0: c = np.r_[c, np.zeros((ct,) + c.shape[1:])] ta, ca, ka = _fitpack_impl.splantider((self.t, c, self.k), 1) if extrapolate == 'periodic': # Split the integral into the part over period (can be several # of them) and the remaining part. ts, te = self.t[self.k], self.t[n] period = te - ts interval = b - a n_periods, left = divmod(interval, period) if n_periods > 0: # Evaluate the difference of antiderivatives. x = np.asarray([ts, te], dtype=np.float_) _bspl.evaluate_spline(ta, ca.reshape(ca.shape[0], -1), ka, x, 0, False, out) integral = out[1] - out[0] integral *= n_periods else: integral = np.zeros((1, prod(self.c.shape[1:])), dtype=self.c.dtype) # Map a to [ts, te], b is always a + left. a = ts + (a - ts) % period b = a + left # If b <= te then we need to integrate over [a, b], otherwise # over [a, te] and from xs to what is remained. if b <= te: x = np.asarray([a, b], dtype=np.float_) _bspl.evaluate_spline(ta, ca.reshape(ca.shape[0], -1), ka, x, 0, False, out) integral += out[1] - out[0] else: x = np.asarray([a, te], dtype=np.float_) _bspl.evaluate_spline(ta, ca.reshape(ca.shape[0], -1), ka, x, 0, False, out) integral += out[1] - out[0] x = np.asarray([ts, ts + b - te], dtype=np.float_) _bspl.evaluate_spline(ta, ca.reshape(ca.shape[0], -1), ka, x, 0, False, out) integral += out[1] - out[0] else: # Evaluate the difference of antiderivatives. x = np.asarray([a, b], dtype=np.float_) _bspl.evaluate_spline(ta, ca.reshape(ca.shape[0], -1), ka, x, 0, extrapolate, out) integral = out[1] - out[0] integral *= sign return integral.reshape(ca.shape[1:]) @classmethod def from_power_basis(cls, pp, bc_type='not-a-knot'): r""" Construct a polynomial in the B-spline basis from a piecewise polynomial in the power basis. For now, accepts ``CubicSpline`` instances only. Parameters ---------- pp : CubicSpline A piecewise polynomial in the power basis, as created by ``CubicSpline`` bc_type : string, optional Boundary condition type as in ``CubicSpline``: one of the ``not-a-knot``, ``natural``, ``clamped``, or ``periodic``. Necessary for construction an instance of ``BSpline`` class. Default is ``not-a-knot``. Returns ------- b : BSpline object A new instance representing the initial polynomial in the B-spline basis. Notes ----- .. versionadded:: 1.8.0 Accepts only ``CubicSpline`` instances for now. The algorithm follows from differentiation the Marsden's identity [1]: each of coefficients of spline interpolation function in the B-spline basis is computed as follows: .. math:: c_j = \sum_{m=0}^{k} \frac{(k-m)!}{k!} c_{m,i} (-1)^{k-m} D^m p_{j,k}(x_i) :math:`c_{m, i}` - a coefficient of CubicSpline, :math:`D^m p_{j, k}(x_i)` - an m-th defivative of a dual polynomial in :math:`x_i`. ``k`` always equals 3 for now. First ``n - 2`` coefficients are computed in :math:`x_i = x_j`, e.g. .. math:: c_1 = \sum_{m=0}^{k} \frac{(k-1)!}{k!} c_{m,1} D^m p_{j,3}(x_1) Last ``nod + 2`` coefficients are computed in ``x[-2]``, ``nod`` - number of derivatives at the ends. For example, consider :math:`x = [0, 1, 2, 3, 4]`, :math:`y = [1, 1, 1, 1, 1]` and bc_type = ``natural`` The coefficients of CubicSpline in the power basis: :math:`[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 1, 1, 1, 1]]` The knot vector: :math:`t = [0, 0, 0, 0, 1, 2, 3, 4, 4, 4, 4]` In this case .. math:: c_j = \frac{0!}{k!} c_{3, i} k! = c_{3, i} = 1,~j = 0, ..., 6 References ---------- .. [1] Tom Lyche and Knut Morken, Spline Methods, 2005, Section 3.1.2 """ from ._cubic import CubicSpline if not isinstance(pp, CubicSpline): raise NotImplementedError("Only CubicSpline objects are accepted" "for now. Got %s instead." % type(pp)) x = pp.x coef = pp.c k = pp.c.shape[0] - 1 n = x.shape[0] if bc_type == 'not-a-knot': t = _not_a_knot(x, k) elif bc_type == 'natural' or bc_type == 'clamped': t = _augknt(x, k) elif bc_type == 'periodic': t = _periodic_knots(x, k) else: raise TypeError('Unknown boundary condition: %s' % bc_type) nod = t.shape[0] - (n + k + 1) # number of derivatives at the ends c = np.zeros(n + nod, dtype=pp.c.dtype) for m in range(k + 1): for i in range(n - 2): c[i] += poch(k + 1, -m) * coef[m, i]\ * np.power(-1, k - m)\ * _diff_dual_poly(i, k, x[i], m, t) for j in range(n - 2, n + nod): c[j] += poch(k + 1, -m) * coef[m, n - 2]\ * np.power(-1, k - m)\ * _diff_dual_poly(j, k, x[n - 2], m, t) return cls.construct_fast(t, c, k, pp.extrapolate, pp.axis) ################################# # Interpolating spline helpers # ################################# def _not_a_knot(x, k): """Given data x, construct the knot vector w/ not-a-knot BC. cf de Boor, XIII(12).""" x = np.asarray(x) if k % 2 != 1: raise ValueError("Odd degree for now only. Got %s." % k) m = (k - 1) // 2 t = x[m+1:-m-1] t = np.r_[(x[0],)*(k+1), t, (x[-1],)*(k+1)] return t def _augknt(x, k): """Construct a knot vector appropriate for the order-k interpolation.""" return np.r_[(x[0],)*k, x, (x[-1],)*k] def _convert_string_aliases(deriv, target_shape): if isinstance(deriv, str): if deriv == "clamped": deriv = [(1, np.zeros(target_shape))] elif deriv == "natural": deriv = [(2, np.zeros(target_shape))] else: raise ValueError("Unknown boundary condition : %s" % deriv) return deriv def _process_deriv_spec(deriv): if deriv is not None: try: ords, vals = zip(*deriv) except TypeError as e: msg = ("Derivatives, `bc_type`, should be specified as a pair of " "iterables of pairs of (order, value).") raise ValueError(msg) from e else: ords, vals = [], [] return np.atleast_1d(ords, vals) def _woodbury_algorithm(A, ur, ll, b, k): ''' Solve a cyclic banded linear system with upper right and lower blocks of size ``(k-1) / 2`` using the Woodbury formula Parameters ---------- A : 2-D array, shape(k, n) Matrix of diagonals of original matrix(see ``solve_banded`` documentation). ur : 2-D array, shape(bs, bs) Upper right block matrix. ll : 2-D array, shape(bs, bs) Lower left block matrix. b : 1-D array, shape(n,) Vector of constant terms of the system of linear equations. k : int B-spline degree. Returns ------- c : 1-D array, shape(n,) Solution of the original system of linear equations. Notes ----- This algorithm works only for systems with banded matrix A plus a correction term U @ V.T, where the matrix U @ V.T gives upper right and lower left block of A The system is solved with the following steps: 1. New systems of linear equations are constructed: A @ z_i = u_i, u_i - columnn vector of U, i = 1, ..., k - 1 2. Matrix Z is formed from vectors z_i: Z = [ z_1 | z_2 | ... | z_{k - 1} ] 3. Matrix H = (1 + V.T @ Z)^{-1} 4. The system A' @ y = b is solved 5. x = y - Z @ (H @ V.T @ y) Also, ``n`` should be greater than ``k``, otherwise corner block elements will intersect with diagonals. Examples -------- Consider the case of n = 8, k = 5 (size of blocks - 2 x 2). The matrix of a system: U: V: x x x * * a b a b 0 0 0 0 1 0 x x x x * * c 0 c 0 0 0 0 0 1 x x x x x * * 0 0 0 0 0 0 0 0 * x x x x x * 0 0 0 0 0 0 0 0 * * x x x x x 0 0 0 0 0 0 0 0 d * * x x x x 0 0 d 0 1 0 0 0 e f * * x x x 0 0 e f 0 1 0 0 References ---------- .. [1] William H. Press, Saul A. Teukolsky, William T. Vetterling and Brian P. Flannery, Numerical Recipes, 2007, Section 2.7.3 ''' k_mod = k - k % 2 bs = int((k - 1) / 2) + (k + 1) % 2 n = A.shape[1] + 1 U = np.zeros((n - 1, k_mod)) VT = np.zeros((k_mod, n - 1)) # V transpose # upper right block U[:bs, :bs] = ur VT[np.arange(bs), np.arange(bs) - bs] = 1 # lower left block U[-bs:, -bs:] = ll VT[np.arange(bs) - bs, np.arange(bs)] = 1 Z = solve_banded((bs, bs), A, U) H = solve(np.identity(k_mod) + VT @ Z, np.identity(k_mod)) y = solve_banded((bs, bs), A, b) c = y - Z @ (H @ (VT @ y)) return c def _periodic_knots(x, k): ''' returns vector of nodes on circle ''' xc = np.copy(x) n = len(xc) if k % 2 == 0: dx = np.diff(xc) xc[1: -1] -= dx[:-1] / 2 dx = np.diff(xc) t = np.zeros(n + 2 * k) t[k: -k] = xc for i in range(0, k): # filling first `k` elements in descending order t[k - i - 1] = t[k - i] - dx[-(i % (n - 1)) - 1] # filling last `k` elements in ascending order t[-k + i] = t[-k + i - 1] + dx[i % (n - 1)] return t def _make_interp_per_full_matr(x, y, t, k): ''' Returns a solution of a system for B-spline interpolation with periodic boundary conditions. First ``k - 1`` rows of matrix are condtions of periodicity (continuity of ``k - 1`` derivatives at the boundary points). Last ``n`` rows are interpolation conditions. RHS is ``k - 1`` zeros and ``n`` ordinates in this case. Parameters ---------- x : 1-D array, shape (n,) Values of x - coordinate of a given set of points. y : 1-D array, shape (n,) Values of y - coordinate of a given set of points. t : 1-D array, shape(n+2*k,) Vector of knots. k : int The maximum degree of spline Returns ------- c : 1-D array, shape (n+k-1,) B-spline coefficients Notes ----- ``t`` is supposed to be taken on circle. ''' x, y, t = map(np.asarray, (x, y, t)) n = x.size # LHS: the collocation matrix + derivatives at edges matr = np.zeros((n + k - 1, n + k - 1)) # derivatives at x[0] and x[-1]: for i in range(k - 1): bb = _bspl.evaluate_all_bspl(t, k, x[0], k, nu=i + 1) matr[i, : k + 1] += bb bb = _bspl.evaluate_all_bspl(t, k, x[-1], n + k - 1, nu=i + 1)[:-1] matr[i, -k:] -= bb # collocation matrix for i in range(n): xval = x[i] # find interval if xval == t[k]: left = k else: left = np.searchsorted(t, xval) - 1 # fill a row bb = _bspl.evaluate_all_bspl(t, k, xval, left) matr[i + k - 1, left-k:left+1] = bb # RHS b = np.r_[[0] * (k - 1), y] c = solve(matr, b) return c def _make_periodic_spline(x, y, t, k, axis): ''' Compute the (coefficients of) interpolating B-spline with periodic boundary conditions. Parameters ---------- x : array_like, shape (n,) Abscissas. y : array_like, shape (n,) Ordinates. k : int B-spline degree. t : array_like, shape (n + 2 * k,). Knots taken on a circle, ``k`` on the left and ``k`` on the right of the vector ``x``. Returns ------- b : a BSpline object of the degree ``k`` and with knots ``t``. Notes ----- The original system is formed by ``n + k - 1`` equations where the first ``k - 1`` of them stand for the ``k - 1`` derivatives continuity on the edges while the other equations correspond to an interpolating case (matching all the input points). Due to a special form of knot vector, it can be proved that in the original system the first and last ``k`` coefficients of a spline function are the same, respectively. It follows from the fact that all ``k - 1`` derivatives are equal term by term at ends and that the matrix of the original system of linear equations is non-degenerate. So, we can reduce the number of equations to ``n - 1`` (first ``k - 1`` equations could be reduced). Another trick of this implementation is cyclic shift of values of B-splines due to equality of ``k`` unknown coefficients. With this we can receive matrix of the system with upper right and lower left blocks, and ``k`` diagonals. It allows to use Woodbury formula to optimize the computations. ''' n = y.shape[0] extradim = prod(y.shape[1:]) y_new = y.reshape(n, extradim) c = np.zeros((n + k - 1, extradim)) # n <= k case is solved with full matrix if n <= k: for i in range(extradim): c[:, i] = _make_interp_per_full_matr(x, y_new[:, i], t, k) c = np.ascontiguousarray(c.reshape((n + k - 1,) + y.shape[1:])) return BSpline.construct_fast(t, c, k, extrapolate='periodic', axis=axis) nt = len(t) - k - 1 # size of block elements kul = int(k / 2) # kl = ku = k ab = np.zeros((3 * k + 1, nt), dtype=np.float_, order='F') # upper right and lower left blocks ur = np.zeros((kul, kul)) ll = np.zeros_like(ur) # `offset` is made to shift all the non-zero elements to the end of the # matrix _bspl._colloc(x, t, k, ab, offset=k) # remove zeros before the matrix ab = ab[-k - (k + 1) % 2:, :] # The least elements in rows (except repetitions) are diagonals # of block matrices. Upper right matrix is an upper triangular # matrix while lower left is a lower triangular one. for i in range(kul): ur += np.diag(ab[-i - 1, i: kul], k=i) ll += np.diag(ab[i, -kul - (k % 2): n - 1 + 2 * kul - i], k=-i) # remove elements that occur in the last point # (first and last points are equivalent) A = ab[:, kul: -k + kul] for i in range(extradim): cc = _woodbury_algorithm(A, ur, ll, y_new[:, i][:-1], k) c[:, i] = np.concatenate((cc[-kul:], cc, cc[:kul + k % 2])) c = np.ascontiguousarray(c.reshape((n + k - 1,) + y.shape[1:])) return BSpline.construct_fast(t, c, k, extrapolate='periodic', axis=axis) def make_interp_spline(x, y, k=3, t=None, bc_type=None, axis=0, check_finite=True): """Compute the (coefficients of) interpolating B-spline. Parameters ---------- x : array_like, shape (n,) Abscissas. y : array_like, shape (n, ...) Ordinates. k : int, optional B-spline degree. Default is cubic, ``k = 3``. t : array_like, shape (nt + k + 1,), optional. Knots. The number of knots needs to agree with the number of data points and the number of derivatives at the edges. Specifically, ``nt - n`` must equal ``len(deriv_l) + len(deriv_r)``. bc_type : 2-tuple or None Boundary conditions. Default is None, which means choosing the boundary conditions automatically. Otherwise, it must be a length-two tuple where the first element (``deriv_l``) sets the boundary conditions at ``x[0]`` and the second element (``deriv_r``) sets the boundary conditions at ``x[-1]``. Each of these must be an iterable of pairs ``(order, value)`` which gives the values of derivatives of specified orders at the given edge of the interpolation interval. Alternatively, the following string aliases are recognized: * ``"clamped"``: The first derivatives at the ends are zero. This is equivalent to ``bc_type=([(1, 0.0)], [(1, 0.0)])``. * ``"natural"``: The second derivatives at ends are zero. This is equivalent to ``bc_type=([(2, 0.0)], [(2, 0.0)])``. * ``"not-a-knot"`` (default): The first and second segments are the same polynomial. This is equivalent to having ``bc_type=None``. * ``"periodic"``: The values and the first ``k-1`` derivatives at the ends are equivalent. axis : int, optional Interpolation axis. Default is 0. check_finite : bool, optional Whether to check that the input arrays contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default is True. Returns ------- b : a BSpline object of the degree ``k`` and with knots ``t``. Examples -------- Use cubic interpolation on Chebyshev nodes: >>> import numpy as np >>> import matplotlib.pyplot as plt >>> def cheb_nodes(N): ... jj = 2.*np.arange(N) + 1 ... x = np.cos(np.pi * jj / 2 / N)[::-1] ... return x >>> x = cheb_nodes(20) >>> y = np.sqrt(1 - x**2) >>> from scipy.interpolate import BSpline, make_interp_spline >>> b = make_interp_spline(x, y) >>> np.allclose(b(x), y) True Note that the default is a cubic spline with a not-a-knot boundary condition >>> b.k 3 Here we use a 'natural' spline, with zero 2nd derivatives at edges: >>> l, r = [(2, 0.0)], [(2, 0.0)] >>> b_n = make_interp_spline(x, y, bc_type=(l, r)) # or, bc_type="natural" >>> np.allclose(b_n(x), y) True >>> x0, x1 = x[0], x[-1] >>> np.allclose([b_n(x0, 2), b_n(x1, 2)], [0, 0]) True Interpolation of parametric curves is also supported. As an example, we compute a discretization of a snail curve in polar coordinates >>> phi = np.linspace(0, 2.*np.pi, 40) >>> r = 0.3 + np.cos(phi) >>> x, y = r*np.cos(phi), r*np.sin(phi) # convert to Cartesian coordinates Build an interpolating curve, parameterizing it by the angle >>> spl = make_interp_spline(phi, np.c_[x, y]) Evaluate the interpolant on a finer grid (note that we transpose the result to unpack it into a pair of x- and y-arrays) >>> phi_new = np.linspace(0, 2.*np.pi, 100) >>> x_new, y_new = spl(phi_new).T Plot the result >>> plt.plot(x, y, 'o') >>> plt.plot(x_new, y_new, '-') >>> plt.show() Build a B-spline curve with 2 dimensional y >>> x = np.linspace(0, 2*np.pi, 10) >>> y = np.array([np.sin(x), np.cos(x)]) Periodic condition is satisfied because y coordinates of points on the ends are equivalent >>> ax = plt.axes(projection='3d') >>> xx = np.linspace(0, 2*np.pi, 100) >>> bspl = make_interp_spline(x, y, k=5, bc_type='periodic', axis=1) >>> ax.plot3D(xx, *bspl(xx)) >>> ax.scatter3D(x, *y, color='red') >>> plt.show() See Also -------- BSpline : base class representing the B-spline objects CubicSpline : a cubic spline in the polynomial basis make_lsq_spline : a similar factory function for spline fitting UnivariateSpline : a wrapper over FITPACK spline fitting routines splrep : a wrapper over FITPACK spline fitting routines """ # convert string aliases for the boundary conditions if bc_type is None or bc_type == 'not-a-knot' or bc_type == 'periodic': deriv_l, deriv_r = None, None elif isinstance(bc_type, str): deriv_l, deriv_r = bc_type, bc_type else: try: deriv_l, deriv_r = bc_type except TypeError as e: raise ValueError("Unknown boundary condition: %s" % bc_type) from e y = np.asarray(y) axis = normalize_axis_index(axis, y.ndim) x = _as_float_array(x, check_finite) y = _as_float_array(y, check_finite) y = np.moveaxis(y, axis, 0) # now internally interp axis is zero # sanity check the input if bc_type == 'periodic' and not np.allclose(y[0], y[-1], atol=1e-15): raise ValueError("First and last points does not match while " "periodic case expected") if x.size != y.shape[0]: raise ValueError('Shapes of x {} and y {} are incompatible' .format(x.shape, y.shape)) if np.any(x[1:] == x[:-1]): raise ValueError("Expect x to not have duplicates") if x.ndim != 1 or np.any(x[1:] < x[:-1]): raise ValueError("Expect x to be a 1D strictly increasing sequence.") # special-case k=0 right away if k == 0: if any(_ is not None for _ in (t, deriv_l, deriv_r)): raise ValueError("Too much info for k=0: t and bc_type can only " "be None.") t = np.r_[x, x[-1]] c = np.asarray(y) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) # special-case k=1 (e.g., Lyche and Morken, Eq.(2.16)) if k == 1 and t is None: if not (deriv_l is None and deriv_r is None): raise ValueError("Too much info for k=1: bc_type can only be None.") t = np.r_[x[0], x, x[-1]] c = np.asarray(y) c = np.ascontiguousarray(c, dtype=_get_dtype(c.dtype)) return BSpline.construct_fast(t, c, k, axis=axis) k = operator.index(k) if bc_type == 'periodic' and t is not None: raise NotImplementedError("For periodic case t is constructed " "automatically and can not be passed manually") # come up with a sensible knot vector, if needed if t is None: if deriv_l is None and deriv_r is None: if bc_type == 'periodic': t = _periodic_knots(x, k) elif k == 2: # OK, it's a bit ad hoc: Greville sites + omit # 2nd and 2nd-to-last points, a la not-a-knot t = (x[1:] + x[:-1]) / 2. t = np.r_[(x[0],)*(k+1), t[1:-1], (x[-1],)*(k+1)] else: t = _not_a_knot(x, k) else: t = _augknt(x, k) t = _as_float_array(t, check_finite) if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] < t[:-1]): raise ValueError("Expect t to be a 1-D sorted array_like.") if t.size < x.size + k + 1: raise ValueError('Got %d knots, need at least %d.' % (t.size, x.size + k + 1)) if (x[0] < t[k]) or (x[-1] > t[-k]): raise ValueError('Out of bounds w/ x = %s.' % x) if bc_type == 'periodic': return _make_periodic_spline(x, y, t, k, axis) # Here : deriv_l, r = [(nu, value), ...] deriv_l = _convert_string_aliases(deriv_l, y.shape[1:]) deriv_l_ords, deriv_l_vals = _process_deriv_spec(deriv_l) nleft = deriv_l_ords.shape[0] deriv_r = _convert_string_aliases(deriv_r, y.shape[1:]) deriv_r_ords, deriv_r_vals = _process_deriv_spec(deriv_r) nright = deriv_r_ords.shape[0] # have `n` conditions for `nt` coefficients; need nt-n derivatives n = x.size nt = t.size - k - 1 if nt - n != nleft + nright: raise ValueError("The number of derivatives at boundaries does not " "match: expected %s, got %s+%s" % (nt-n, nleft, nright)) # bail out if the `y` array is zero-sized if y.size == 0: c = np.zeros((nt,) + y.shape[1:], dtype=float) return BSpline.construct_fast(t, c, k, axis=axis) # set up the LHS: the collocation matrix + derivatives at boundaries kl = ku = k ab = np.zeros((2*kl + ku + 1, nt), dtype=np.float_, order='F') _bspl._colloc(x, t, k, ab, offset=nleft) if nleft > 0: _bspl._handle_lhs_derivatives(t, k, x[0], ab, kl, ku, deriv_l_ords) if nright > 0: _bspl._handle_lhs_derivatives(t, k, x[-1], ab, kl, ku, deriv_r_ords, offset=nt-nright) # set up the RHS: values to interpolate (+ derivative values, if any) extradim = prod(y.shape[1:]) rhs = np.empty((nt, extradim), dtype=y.dtype) if nleft > 0: rhs[:nleft] = deriv_l_vals.reshape(-1, extradim) rhs[nleft:nt - nright] = y.reshape(-1, extradim) if nright > 0: rhs[nt - nright:] = deriv_r_vals.reshape(-1, extradim) # solve Ab @ x = rhs; this is the relevant part of linalg.solve_banded if check_finite: ab, rhs = map(np.asarray_chkfinite, (ab, rhs)) gbsv, = get_lapack_funcs(('gbsv',), (ab, rhs)) lu, piv, c, info = gbsv(kl, ku, ab, rhs, overwrite_ab=True, overwrite_b=True) if info > 0: raise LinAlgError("Collocation matrix is singular.") elif info < 0: raise ValueError('illegal value in %d-th argument of internal gbsv' % -info) c = np.ascontiguousarray(c.reshape((nt,) + y.shape[1:])) return BSpline.construct_fast(t, c, k, axis=axis) def make_lsq_spline(x, y, t, k=3, w=None, axis=0, check_finite=True): r"""Compute the (coefficients of) an LSQ (Least SQuared) based fitting B-spline. The result is a linear combination .. math:: S(x) = \sum_j c_j B_j(x; t) of the B-spline basis elements, :math:`B_j(x; t)`, which minimizes .. math:: \sum_{j} \left( w_j \times (S(x_j) - y_j) \right)^2 Parameters ---------- x : array_like, shape (m,) Abscissas. y : array_like, shape (m, ...) Ordinates. t : array_like, shape (n + k + 1,). Knots. Knots and data points must satisfy Schoenberg-Whitney conditions. k : int, optional B-spline degree. Default is cubic, ``k = 3``. w : array_like, shape (m,), optional Weights for spline fitting. Must be positive. If ``None``, then weights are all equal. Default is ``None``. axis : int, optional Interpolation axis. Default is zero. check_finite : bool, optional Whether to check that the input arrays contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default is True. Returns ------- b : a BSpline object of the degree ``k`` with knots ``t``. Notes ----- The number of data points must be larger than the spline degree ``k``. Knots ``t`` must satisfy the Schoenberg-Whitney conditions, i.e., there must be a subset of data points ``x[j]`` such that ``t[j] < x[j] < t[j+k+1]``, for ``j=0, 1,...,n-k-2``. Examples -------- Generate some noisy data: >>> import numpy as np >>> import matplotlib.pyplot as plt >>> rng = np.random.default_rng() >>> x = np.linspace(-3, 3, 50) >>> y = np.exp(-x**2) + 0.1 * rng.standard_normal(50) Now fit a smoothing cubic spline with a pre-defined internal knots. Here we make the knot vector (k+1)-regular by adding boundary knots: >>> from scipy.interpolate import make_lsq_spline, BSpline >>> t = [-1, 0, 1] >>> k = 3 >>> t = np.r_[(x[0],)*(k+1), ... t, ... (x[-1],)*(k+1)] >>> spl = make_lsq_spline(x, y, t, k) For comparison, we also construct an interpolating spline for the same set of data: >>> from scipy.interpolate import make_interp_spline >>> spl_i = make_interp_spline(x, y) Plot both: >>> xs = np.linspace(-3, 3, 100) >>> plt.plot(x, y, 'ro', ms=5) >>> plt.plot(xs, spl(xs), 'g-', lw=3, label='LSQ spline') >>> plt.plot(xs, spl_i(xs), 'b-', lw=3, alpha=0.7, label='interp spline') >>> plt.legend(loc='best') >>> plt.show() **NaN handling**: If the input arrays contain ``nan`` values, the result is not useful since the underlying spline fitting routines cannot deal with ``nan``. A workaround is to use zero weights for not-a-number data points: >>> y[8] = np.nan >>> w = np.isnan(y) >>> y[w] = 0. >>> tck = make_lsq_spline(x, y, t, w=~w) Notice the need to replace a ``nan`` by a numerical value (precise value does not matter as long as the corresponding weight is zero.) See Also -------- BSpline : base class representing the B-spline objects make_interp_spline : a similar factory function for interpolating splines LSQUnivariateSpline : a FITPACK-based spline fitting routine splrep : a FITPACK-based fitting routine """ x = _as_float_array(x, check_finite) y = _as_float_array(y, check_finite) t = _as_float_array(t, check_finite) if w is not None: w = _as_float_array(w, check_finite) else: w = np.ones_like(x) k = operator.index(k) axis = normalize_axis_index(axis, y.ndim) y = np.moveaxis(y, axis, 0) # now internally interp axis is zero if x.ndim != 1 or np.any(x[1:] - x[:-1] <= 0): raise ValueError("Expect x to be a 1-D sorted array_like.") if x.shape[0] < k+1: raise ValueError("Need more x points.") if k < 0: raise ValueError("Expect non-negative k.") if t.ndim != 1 or np.any(t[1:] - t[:-1] < 0): raise ValueError("Expect t to be a 1-D sorted array_like.") if x.size != y.shape[0]: raise ValueError('Shapes of x {} and y {} are incompatible' .format(x.shape, y.shape)) if k > 0 and np.any((x < t[k]) | (x > t[-k])): raise ValueError('Out of bounds w/ x = %s.' % x) if x.size != w.size: raise ValueError('Shapes of x {} and w {} are incompatible' .format(x.shape, w.shape)) # number of coefficients n = t.size - k - 1 # construct A.T @ A and rhs with A the collocation matrix, and # rhs = A.T @ y for solving the LSQ problem ``A.T @ A @ c = A.T @ y`` lower = True extradim = prod(y.shape[1:]) ab = np.zeros((k+1, n), dtype=np.float_, order='F') rhs = np.zeros((n, extradim), dtype=y.dtype, order='F') _bspl._norm_eq_lsq(x, t, k, y.reshape(-1, extradim), w, ab, rhs) rhs = rhs.reshape((n,) + y.shape[1:]) # have observation matrix & rhs, can solve the LSQ problem cho_decomp = cholesky_banded(ab, overwrite_ab=True, lower=lower, check_finite=check_finite) c = cho_solve_banded((cho_decomp, lower), rhs, overwrite_b=True, check_finite=check_finite) c = np.ascontiguousarray(c) return BSpline.construct_fast(t, c, k, axis=axis) ############################# # Smoothing spline helpers # ############################# def _compute_optimal_gcv_parameter(X, wE, y, w): """ Returns an optimal regularization parameter from the GCV criteria [1]. Parameters ---------- X : array, shape (5, n) 5 bands of the design matrix ``X`` stored in LAPACK banded storage. wE : array, shape (5, n) 5 bands of the penalty matrix :math:`W^{-1} E` stored in LAPACK banded storage. y : array, shape (n,) Ordinates. w : array, shape (n,) Vector of weights. Returns ------- lam : float An optimal from the GCV criteria point of view regularization parameter. Notes ----- No checks are performed. References ---------- .. [1] G. Wahba, "Estimating the smoothing parameter" in Spline models for observational data, Philadelphia, Pennsylvania: Society for Industrial and Applied Mathematics, 1990, pp. 45-65. :doi:`10.1137/1.9781611970128` """ def compute_banded_symmetric_XT_W_Y(X, w, Y): """ Assuming that the product :math:`X^T W Y` is symmetric and both ``X`` and ``Y`` are 5-banded, compute the unique bands of the product. Parameters ---------- X : array, shape (5, n) 5 bands of the matrix ``X`` stored in LAPACK banded storage. w : array, shape (n,) Array of weights Y : array, shape (5, n) 5 bands of the matrix ``Y`` stored in LAPACK banded storage. Returns ------- res : array, shape (4, n) The result of the product :math:`X^T Y` stored in the banded way. Notes ----- As far as the matrices ``X`` and ``Y`` are 5-banded, their product :math:`X^T W Y` is 7-banded. It is also symmetric, so we can store only unique diagonals. """ # compute W Y W_Y = np.copy(Y) W_Y[2] *= w for i in range(2): W_Y[i, 2 - i:] *= w[:-2 + i] W_Y[3 + i, :-1 - i] *= w[1 + i:] n = X.shape[1] res = np.zeros((4, n)) for i in range(n): for j in range(min(n-i, 4)): res[-j-1, i + j] = sum(X[j:, i] * W_Y[:5-j, i + j]) return res def compute_b_inv(A): """ Inverse 3 central bands of matrix :math:`A=U^T D^{-1} U` assuming that ``U`` is a unit upper triangular banded matrix using an algorithm proposed in [1]. Parameters ---------- A : array, shape (4, n) Matrix to inverse, stored in LAPACK banded storage. Returns ------- B : array, shape (4, n) 3 unique bands of the symmetric matrix that is an inverse to ``A``. The first row is filled with zeros. Notes ----- The algorithm is based on the cholesky decomposition and, therefore, in case matrix ``A`` is close to not positive defined, the function raises LinalgError. Both matrices ``A`` and ``B`` are stored in LAPACK banded storage. References ---------- .. [1] M. F. Hutchinson and F. R. de Hoog, "Smoothing noisy data with spline functions," Numerische Mathematik, vol. 47, no. 1, pp. 99-106, 1985. :doi:`10.1007/BF01389878` """ def find_b_inv_elem(i, j, U, D, B): rng = min(3, n - i - 1) rng_sum = 0. if j == 0: # use 2-nd formula from [1] for k in range(1, rng + 1): rng_sum -= U[-k - 1, i + k] * B[-k - 1, i + k] rng_sum += D[i] B[-1, i] = rng_sum else: # use 1-st formula from [1] for k in range(1, rng + 1): diag = abs(k - j) ind = i + min(k, j) rng_sum -= U[-k - 1, i + k] * B[-diag - 1, ind + diag] B[-j - 1, i + j] = rng_sum U = cholesky_banded(A) for i in range(2, 5): U[-i, i-1:] /= U[-1, :-i+1] D = 1. / (U[-1])**2 U[-1] /= U[-1] n = U.shape[1] B = np.zeros(shape=(4, n)) for i in range(n - 1, -1, -1): for j in range(min(3, n - i - 1), -1, -1): find_b_inv_elem(i, j, U, D, B) # the first row contains garbage and should be removed B[0] = [0.] * n return B def _gcv(lam, X, XtWX, wE, XtE): r""" Computes the generalized cross-validation criteria [1]. Parameters ---------- lam : float, (:math:`\lambda \geq 0`) Regularization parameter. X : array, shape (5, n) Matrix is stored in LAPACK banded storage. XtWX : array, shape (4, n) Product :math:`X^T W X` stored in LAPACK banded storage. wE : array, shape (5, n) Matrix :math:`W^{-1} E` stored in LAPACK banded storage. XtE : array, shape (4, n) Product :math:`X^T E` stored in LAPACK banded storage. Returns ------- res : float Value of the GCV criteria with the regularization parameter :math:`\lambda`. Notes ----- Criteria is computed from the formula (1.3.2) [3]: .. math: GCV(\lambda) = \dfrac{1}{n} \sum\limits_{k = 1}^{n} \dfrac{ \left( y_k - f_{\lambda}(x_k) \right)^2}{\left( 1 - \Tr{A}/n\right)^2}$. The criteria is discussed in section 1.3 [3]. The numerator is computed using (2.2.4) [3] and the denominator is computed using an algorithm from [2] (see in the ``compute_b_inv`` function). References ---------- .. [1] G. Wahba, "Estimating the smoothing parameter" in Spline models for observational data, Philadelphia, Pennsylvania: Society for Industrial and Applied Mathematics, 1990, pp. 45-65. :doi:`10.1137/1.9781611970128` .. [2] M. F. Hutchinson and F. R. de Hoog, "Smoothing noisy data with spline functions," Numerische Mathematik, vol. 47, no. 1, pp. 99-106, 1985. :doi:`10.1007/BF01389878` .. [3] E. Zemlyanoy, "Generalized cross-validation smoothing splines", BSc thesis, 2022. Might be available (in Russian) `here `_ """ # Compute the numerator from (2.2.4) [3] n = X.shape[1] c = solve_banded((2, 2), X + lam * wE, y) res = np.zeros(n) # compute ``W^{-1} E c`` with respect to banded-storage of ``E`` tmp = wE * c for i in range(n): for j in range(max(0, i - n + 3), min(5, i + 3)): res[i] += tmp[j, i + 2 - j] numer = np.linalg.norm(lam * res)**2 / n # compute the denominator lhs = XtWX + lam * XtE try: b_banded = compute_b_inv(lhs) # compute the trace of the product b_banded @ XtX tr = b_banded * XtWX tr[:-1] *= 2 # find the denominator denom = (1 - sum(sum(tr)) / n)**2 except LinAlgError: # cholesky decomposition cannot be performed raise ValueError('Seems like the problem is ill-posed') res = numer / denom return res n = X.shape[1] XtWX = compute_banded_symmetric_XT_W_Y(X, w, X) XtE = compute_banded_symmetric_XT_W_Y(X, w, wE) def fun(lam): return _gcv(lam, X, XtWX, wE, XtE) gcv_est = minimize_scalar(fun, bounds=(0, n), method='Bounded') if gcv_est.success: return gcv_est.x raise ValueError(f"Unable to find minimum of the GCV " f"function: {gcv_est.message}") def _coeff_of_divided_diff(x): """ Returns the coefficients of the divided difference. Parameters ---------- x : array, shape (n,) Array which is used for the computation of divided difference. Returns ------- res : array_like, shape (n,) Coefficients of the divided difference. Notes ----- Vector ``x`` should have unique elements, otherwise an error division by zero might be raised. No checks are performed. """ n = x.shape[0] res = np.zeros(n) for i in range(n): pp = 1. for k in range(n): if k != i: pp *= (x[i] - x[k]) res[i] = 1. / pp return res def make_smoothing_spline(x, y, w=None, lam=None): r""" Compute the (coefficients of) smoothing cubic spline function using ``lam`` to control the tradeoff between the amount of smoothness of the curve and its proximity to the data. In case ``lam`` is None, using the GCV criteria [1] to find it. A smoothing spline is found as a solution to the regularized weighted linear regression problem: .. math:: \sum\limits_{i=1}^n w_i\lvert y_i - f(x_i) \rvert^2 + \lambda\int\limits_{x_1}^{x_n} (f^{(2)}(u))^2 d u where :math:`f` is a spline function, :math:`w` is a vector of weights and :math:`\lambda` is a regularization parameter. If ``lam`` is None, we use the GCV criteria to find an optimal regularization parameter, otherwise we solve the regularized weighted linear regression problem with given parameter. The parameter controls the tradeoff in the following way: the larger the parameter becomes, the smoother the function gets. Parameters ---------- x : array_like, shape (n,) Abscissas. y : array_like, shape (n,) Ordinates. w : array_like, shape (n,), optional Vector of weights. Default is ``np.ones_like(x)``. lam : float, (:math:`\lambda \geq 0`), optional Regularization parameter. If ``lam`` is None, then it is found from the GCV criteria. Default is None. Returns ------- func : a BSpline object. A callable representing a spline in the B-spline basis as a solution of the problem of smoothing splines using the GCV criteria [1] in case ``lam`` is None, otherwise using the given parameter ``lam``. Notes ----- This algorithm is a clean room reimplementation of the algorithm introduced by Woltring in FORTRAN [2]. The original version cannot be used in SciPy source code because of the license issues. The details of the reimplementation are discussed here (available only in Russian) [4]. If the vector of weights ``w`` is None, we assume that all the points are equal in terms of weights, and vector of weights is vector of ones. Note that in weighted residual sum of squares, weights are not squared: :math:`\sum\limits_{i=1}^n w_i\lvert y_i - f(x_i) \rvert^2` while in ``splrep`` the sum is built from the squared weights. In cases when the initial problem is ill-posed (for example, the product :math:`X^T W X` where :math:`X` is a design matrix is not a positive defined matrix) a ValueError is raised. References ---------- .. [1] G. Wahba, "Estimating the smoothing parameter" in Spline models for observational data, Philadelphia, Pennsylvania: Society for Industrial and Applied Mathematics, 1990, pp. 45-65. :doi:`10.1137/1.9781611970128` .. [2] H. J. Woltring, A Fortran package for generalized, cross-validatory spline smoothing and differentiation, Advances in Engineering Software, vol. 8, no. 2, pp. 104-113, 1986. :doi:`10.1016/0141-1195(86)90098-7` .. [3] T. Hastie, J. Friedman, and R. Tisbshirani, "Smoothing Splines" in The elements of Statistical Learning: Data Mining, Inference, and prediction, New York: Springer, 2017, pp. 241-249. :doi:`10.1007/978-0-387-84858-7` .. [4] E. Zemlyanoy, "Generalized cross-validation smoothing splines", BSc thesis, 2022. ``_ (in Russian) Examples -------- Generate some noisy data >>> import numpy as np >>> np.random.seed(1234) >>> n = 200 >>> def func(x): ... return x**3 + x**2 * np.sin(4 * x) >>> x = np.sort(np.random.random_sample(n) * 4 - 2) >>> y = func(x) + np.random.normal(scale=1.5, size=n) Make a smoothing spline function >>> from scipy.interpolate import make_smoothing_spline >>> spl = make_smoothing_spline(x, y) Plot both >>> import matplotlib.pyplot as plt >>> grid = np.linspace(x[0], x[-1], 400) >>> plt.plot(grid, spl(grid), label='Spline') >>> plt.plot(grid, func(grid), label='Original function') >>> plt.scatter(x, y, marker='.') >>> plt.legend(loc='best') >>> plt.show() """ x = np.ascontiguousarray(x, dtype=float) y = np.ascontiguousarray(y, dtype=float) if any(x[1:] - x[:-1] <= 0): raise ValueError('``x`` should be an ascending array') if x.ndim != 1 or y.ndim != 1 or x.shape[0] != y.shape[0]: raise ValueError('``x`` and ``y`` should be one dimensional and the' ' same size') if w is None: w = np.ones(len(x)) else: w = np.ascontiguousarray(w) if any(w <= 0): raise ValueError('Invalid vector of weights') t = np.r_[[x[0]] * 3, x, [x[-1]] * 3] n = x.shape[0] # It is known that the solution to the stated minimization problem exists # and is a natural cubic spline with vector of knots equal to the unique # elements of ``x`` [3], so we will solve the problem in the basis of # natural splines. # create design matrix in the B-spline basis X_bspl = BSpline.design_matrix(x, t, 3) # move from B-spline basis to the basis of natural splines using equations # (2.1.7) [4] # central elements X = np.zeros((5, n)) for i in range(1, 4): X[i, 2: -2] = X_bspl[i: i - 4, 3: -3][np.diag_indices(n - 4)] # first elements X[1, 1] = X_bspl[0, 0] X[2, :2] = ((x[2] + x[1] - 2 * x[0]) * X_bspl[0, 0], X_bspl[1, 1] + X_bspl[1, 2]) X[3, :2] = ((x[2] - x[0]) * X_bspl[1, 1], X_bspl[2, 2]) # last elements X[1, -2:] = (X_bspl[-3, -3], (x[-1] - x[-3]) * X_bspl[-2, -2]) X[2, -2:] = (X_bspl[-2, -3] + X_bspl[-2, -2], (2 * x[-1] - x[-2] - x[-3]) * X_bspl[-1, -1]) X[3, -2] = X_bspl[-1, -1] # create penalty matrix and divide it by vector of weights: W^{-1} E wE = np.zeros((5, n)) wE[2:, 0] = _coeff_of_divided_diff(x[:3]) / w[:3] wE[1:, 1] = _coeff_of_divided_diff(x[:4]) / w[:4] for j in range(2, n - 2): wE[:, j] = (x[j+2] - x[j-2]) * _coeff_of_divided_diff(x[j-2:j+3])\ / w[j-2: j+3] wE[:-1, -2] = -_coeff_of_divided_diff(x[-4:]) / w[-4:] wE[:-2, -1] = _coeff_of_divided_diff(x[-3:]) / w[-3:] wE *= 6 if lam is None: lam = _compute_optimal_gcv_parameter(X, wE, y, w) elif lam < 0.: raise ValueError('Regularization parameter should be non-negative') # solve the initial problem in the basis of natural splines c = solve_banded((2, 2), X + lam * wE, y) # move back to B-spline basis using equations (2.2.10) [4] c_ = np.r_[c[0] * (t[5] + t[4] - 2 * t[3]) + c[1], c[0] * (t[5] - t[3]) + c[1], c[1: -1], c[-1] * (t[-4] - t[-6]) + c[-2], c[-1] * (2 * t[-4] - t[-5] - t[-6]) + c[-2]] return BSpline.construct_fast(t, c_, 3)