485 lines
11 KiB
Python
485 lines
11 KiB
Python
"""
|
|
Low-level BLAS functions (:mod:`scipy.linalg.blas`)
|
|
===================================================
|
|
|
|
This module contains low-level functions from the BLAS library.
|
|
|
|
.. versionadded:: 0.12.0
|
|
|
|
.. note::
|
|
|
|
The common ``overwrite_<>`` option in many routines, allows the
|
|
input arrays to be overwritten to avoid extra memory allocation.
|
|
However this requires the array to satisfy two conditions
|
|
which are memory order and the data type to match exactly the
|
|
order and the type expected by the routine.
|
|
|
|
As an example, if you pass a double precision float array to any
|
|
``S....`` routine which expects single precision arguments, f2py
|
|
will create an intermediate array to match the argument types and
|
|
overwriting will be performed on that intermediate array.
|
|
|
|
Similarly, if a C-contiguous array is passed, f2py will pass a
|
|
FORTRAN-contiguous array internally. Please make sure that these
|
|
details are satisfied. More information can be found in the f2py
|
|
documentation.
|
|
|
|
.. warning::
|
|
|
|
These functions do little to no error checking.
|
|
It is possible to cause crashes by mis-using them,
|
|
so prefer using the higher-level routines in `scipy.linalg`.
|
|
|
|
Finding functions
|
|
-----------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
get_blas_funcs
|
|
find_best_blas_type
|
|
|
|
BLAS Level 1 functions
|
|
----------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
caxpy
|
|
ccopy
|
|
cdotc
|
|
cdotu
|
|
crotg
|
|
cscal
|
|
csrot
|
|
csscal
|
|
cswap
|
|
dasum
|
|
daxpy
|
|
dcopy
|
|
ddot
|
|
dnrm2
|
|
drot
|
|
drotg
|
|
drotm
|
|
drotmg
|
|
dscal
|
|
dswap
|
|
dzasum
|
|
dznrm2
|
|
icamax
|
|
idamax
|
|
isamax
|
|
izamax
|
|
sasum
|
|
saxpy
|
|
scasum
|
|
scnrm2
|
|
scopy
|
|
sdot
|
|
snrm2
|
|
srot
|
|
srotg
|
|
srotm
|
|
srotmg
|
|
sscal
|
|
sswap
|
|
zaxpy
|
|
zcopy
|
|
zdotc
|
|
zdotu
|
|
zdrot
|
|
zdscal
|
|
zrotg
|
|
zscal
|
|
zswap
|
|
|
|
BLAS Level 2 functions
|
|
----------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
sgbmv
|
|
sgemv
|
|
sger
|
|
ssbmv
|
|
sspr
|
|
sspr2
|
|
ssymv
|
|
ssyr
|
|
ssyr2
|
|
stbmv
|
|
stpsv
|
|
strmv
|
|
strsv
|
|
dgbmv
|
|
dgemv
|
|
dger
|
|
dsbmv
|
|
dspr
|
|
dspr2
|
|
dsymv
|
|
dsyr
|
|
dsyr2
|
|
dtbmv
|
|
dtpsv
|
|
dtrmv
|
|
dtrsv
|
|
cgbmv
|
|
cgemv
|
|
cgerc
|
|
cgeru
|
|
chbmv
|
|
chemv
|
|
cher
|
|
cher2
|
|
chpmv
|
|
chpr
|
|
chpr2
|
|
ctbmv
|
|
ctbsv
|
|
ctpmv
|
|
ctpsv
|
|
ctrmv
|
|
ctrsv
|
|
csyr
|
|
zgbmv
|
|
zgemv
|
|
zgerc
|
|
zgeru
|
|
zhbmv
|
|
zhemv
|
|
zher
|
|
zher2
|
|
zhpmv
|
|
zhpr
|
|
zhpr2
|
|
ztbmv
|
|
ztbsv
|
|
ztpmv
|
|
ztrmv
|
|
ztrsv
|
|
zsyr
|
|
|
|
BLAS Level 3 functions
|
|
----------------------
|
|
|
|
.. autosummary::
|
|
:toctree: generated/
|
|
|
|
sgemm
|
|
ssymm
|
|
ssyr2k
|
|
ssyrk
|
|
strmm
|
|
strsm
|
|
dgemm
|
|
dsymm
|
|
dsyr2k
|
|
dsyrk
|
|
dtrmm
|
|
dtrsm
|
|
cgemm
|
|
chemm
|
|
cher2k
|
|
cherk
|
|
csymm
|
|
csyr2k
|
|
csyrk
|
|
ctrmm
|
|
ctrsm
|
|
zgemm
|
|
zhemm
|
|
zher2k
|
|
zherk
|
|
zsymm
|
|
zsyr2k
|
|
zsyrk
|
|
ztrmm
|
|
ztrsm
|
|
|
|
"""
|
|
#
|
|
# Author: Pearu Peterson, March 2002
|
|
# refactoring by Fabian Pedregosa, March 2010
|
|
#
|
|
|
|
__all__ = ['get_blas_funcs', 'find_best_blas_type']
|
|
|
|
import numpy as _np
|
|
import functools
|
|
|
|
from scipy.linalg import _fblas
|
|
try:
|
|
from scipy.linalg import _cblas
|
|
except ImportError:
|
|
_cblas = None
|
|
|
|
try:
|
|
from scipy.linalg import _fblas_64
|
|
HAS_ILP64 = True
|
|
except ImportError:
|
|
HAS_ILP64 = False
|
|
_fblas_64 = None
|
|
|
|
# Expose all functions (only fblas --- cblas is an implementation detail)
|
|
empty_module = None
|
|
from scipy.linalg._fblas import * # noqa: E402, F403
|
|
del empty_module
|
|
|
|
# all numeric dtypes '?bBhHiIlLqQefdgFDGO' that are safe to be converted to
|
|
|
|
# single precision float : '?bBhH!!!!!!ef!!!!!!'
|
|
# double precision float : '?bBhHiIlLqQefdg!!!!'
|
|
# single precision complex : '?bBhH!!!!!!ef!!F!!!'
|
|
# double precision complex : '?bBhHiIlLqQefdgFDG!'
|
|
|
|
_type_score = {x: 1 for x in '?bBhHef'}
|
|
_type_score.update({x: 2 for x in 'iIlLqQd'})
|
|
|
|
# Handle float128(g) and complex256(G) separately in case non-Windows systems.
|
|
# On Windows, the values will be rewritten to the same key with the same value.
|
|
_type_score.update({'F': 3, 'D': 4, 'g': 2, 'G': 4})
|
|
|
|
# Final mapping to the actual prefixes and dtypes
|
|
_type_conv = {1: ('s', _np.dtype('float32')),
|
|
2: ('d', _np.dtype('float64')),
|
|
3: ('c', _np.dtype('complex64')),
|
|
4: ('z', _np.dtype('complex128'))}
|
|
|
|
# some convenience alias for complex functions
|
|
_blas_alias = {'cnrm2': 'scnrm2', 'znrm2': 'dznrm2',
|
|
'cdot': 'cdotc', 'zdot': 'zdotc',
|
|
'cger': 'cgerc', 'zger': 'zgerc',
|
|
'sdotc': 'sdot', 'sdotu': 'sdot',
|
|
'ddotc': 'ddot', 'ddotu': 'ddot'}
|
|
|
|
|
|
def find_best_blas_type(arrays=(), dtype=None):
|
|
"""Find best-matching BLAS/LAPACK type.
|
|
|
|
Arrays are used to determine the optimal prefix of BLAS routines.
|
|
|
|
Parameters
|
|
----------
|
|
arrays : sequence of ndarrays, optional
|
|
Arrays can be given to determine optimal prefix of BLAS
|
|
routines. If not given, double-precision routines will be
|
|
used, otherwise the most generic type in arrays will be used.
|
|
dtype : str or dtype, optional
|
|
Data-type specifier. Not used if `arrays` is non-empty.
|
|
|
|
Returns
|
|
-------
|
|
prefix : str
|
|
BLAS/LAPACK prefix character.
|
|
dtype : dtype
|
|
Inferred Numpy data type.
|
|
prefer_fortran : bool
|
|
Whether to prefer Fortran order routines over C order.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> import scipy.linalg.blas as bla
|
|
>>> rng = np.random.default_rng()
|
|
>>> a = rng.random((10,15))
|
|
>>> b = np.asfortranarray(a) # Change the memory layout order
|
|
>>> bla.find_best_blas_type((a,))
|
|
('d', dtype('float64'), False)
|
|
>>> bla.find_best_blas_type((a*1j,))
|
|
('z', dtype('complex128'), False)
|
|
>>> bla.find_best_blas_type((b,))
|
|
('d', dtype('float64'), True)
|
|
|
|
"""
|
|
dtype = _np.dtype(dtype)
|
|
max_score = _type_score.get(dtype.char, 5)
|
|
prefer_fortran = False
|
|
|
|
if arrays:
|
|
# In most cases, single element is passed through, quicker route
|
|
if len(arrays) == 1:
|
|
max_score = _type_score.get(arrays[0].dtype.char, 5)
|
|
prefer_fortran = arrays[0].flags['FORTRAN']
|
|
else:
|
|
# use the most generic type in arrays
|
|
scores = [_type_score.get(x.dtype.char, 5) for x in arrays]
|
|
max_score = max(scores)
|
|
ind_max_score = scores.index(max_score)
|
|
# safe upcasting for mix of float64 and complex64 --> prefix 'z'
|
|
if max_score == 3 and (2 in scores):
|
|
max_score = 4
|
|
|
|
if arrays[ind_max_score].flags['FORTRAN']:
|
|
# prefer Fortran for leading array with column major order
|
|
prefer_fortran = True
|
|
|
|
# Get the LAPACK prefix and the corresponding dtype if not fall back
|
|
# to 'd' and double precision float.
|
|
prefix, dtype = _type_conv.get(max_score, ('d', _np.dtype('float64')))
|
|
|
|
return prefix, dtype, prefer_fortran
|
|
|
|
|
|
def _get_funcs(names, arrays, dtype,
|
|
lib_name, fmodule, cmodule,
|
|
fmodule_name, cmodule_name, alias,
|
|
ilp64=False):
|
|
"""
|
|
Return available BLAS/LAPACK functions.
|
|
|
|
Used also in lapack.py. See get_blas_funcs for docstring.
|
|
"""
|
|
|
|
funcs = []
|
|
unpack = False
|
|
dtype = _np.dtype(dtype)
|
|
module1 = (cmodule, cmodule_name)
|
|
module2 = (fmodule, fmodule_name)
|
|
|
|
if isinstance(names, str):
|
|
names = (names,)
|
|
unpack = True
|
|
|
|
prefix, dtype, prefer_fortran = find_best_blas_type(arrays, dtype)
|
|
|
|
if prefer_fortran:
|
|
module1, module2 = module2, module1
|
|
|
|
for name in names:
|
|
func_name = prefix + name
|
|
func_name = alias.get(func_name, func_name)
|
|
func = getattr(module1[0], func_name, None)
|
|
module_name = module1[1]
|
|
if func is None:
|
|
func = getattr(module2[0], func_name, None)
|
|
module_name = module2[1]
|
|
if func is None:
|
|
raise ValueError(
|
|
f'{lib_name} function {func_name} could not be found')
|
|
func.module_name, func.typecode = module_name, prefix
|
|
func.dtype = dtype
|
|
if not ilp64:
|
|
func.int_dtype = _np.dtype(_np.intc)
|
|
else:
|
|
func.int_dtype = _np.dtype(_np.int64)
|
|
func.prefix = prefix # Backward compatibility
|
|
funcs.append(func)
|
|
|
|
if unpack:
|
|
return funcs[0]
|
|
else:
|
|
return funcs
|
|
|
|
|
|
def _memoize_get_funcs(func):
|
|
"""
|
|
Memoized fast path for _get_funcs instances
|
|
"""
|
|
memo = {}
|
|
func.memo = memo
|
|
|
|
@functools.wraps(func)
|
|
def getter(names, arrays=(), dtype=None, ilp64=False):
|
|
key = (names, dtype, ilp64)
|
|
for array in arrays:
|
|
# cf. find_blas_funcs
|
|
key += (array.dtype.char, array.flags.fortran)
|
|
|
|
try:
|
|
value = memo.get(key)
|
|
except TypeError:
|
|
# unhashable key etc.
|
|
key = None
|
|
value = None
|
|
|
|
if value is not None:
|
|
return value
|
|
|
|
value = func(names, arrays, dtype, ilp64)
|
|
|
|
if key is not None:
|
|
memo[key] = value
|
|
|
|
return value
|
|
|
|
return getter
|
|
|
|
|
|
@_memoize_get_funcs
|
|
def get_blas_funcs(names, arrays=(), dtype=None, ilp64=False):
|
|
"""Return available BLAS function objects from names.
|
|
|
|
Arrays are used to determine the optimal prefix of BLAS routines.
|
|
|
|
Parameters
|
|
----------
|
|
names : str or sequence of str
|
|
Name(s) of BLAS functions without type prefix.
|
|
|
|
arrays : sequence of ndarrays, optional
|
|
Arrays can be given to determine optimal prefix of BLAS
|
|
routines. If not given, double-precision routines will be
|
|
used, otherwise the most generic type in arrays will be used.
|
|
|
|
dtype : str or dtype, optional
|
|
Data-type specifier. Not used if `arrays` is non-empty.
|
|
|
|
ilp64 : {True, False, 'preferred'}, optional
|
|
Whether to return ILP64 routine variant.
|
|
Choosing 'preferred' returns ILP64 routine if available,
|
|
and otherwise the 32-bit routine. Default: False
|
|
|
|
Returns
|
|
-------
|
|
funcs : list
|
|
List containing the found function(s).
|
|
|
|
|
|
Notes
|
|
-----
|
|
This routine automatically chooses between Fortran/C
|
|
interfaces. Fortran code is used whenever possible for arrays with
|
|
column major order. In all other cases, C code is preferred.
|
|
|
|
In BLAS, the naming convention is that all functions start with a
|
|
type prefix, which depends on the type of the principal
|
|
matrix. These can be one of {'s', 'd', 'c', 'z'} for the NumPy
|
|
types {float32, float64, complex64, complex128} respectively.
|
|
The code and the dtype are stored in attributes `typecode` and `dtype`
|
|
of the returned functions.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> import scipy.linalg as LA
|
|
>>> rng = np.random.default_rng()
|
|
>>> a = rng.random((3,2))
|
|
>>> x_gemv = LA.get_blas_funcs('gemv', (a,))
|
|
>>> x_gemv.typecode
|
|
'd'
|
|
>>> x_gemv = LA.get_blas_funcs('gemv',(a*1j,))
|
|
>>> x_gemv.typecode
|
|
'z'
|
|
|
|
"""
|
|
if isinstance(ilp64, str):
|
|
if ilp64 == 'preferred':
|
|
ilp64 = HAS_ILP64
|
|
else:
|
|
raise ValueError("Invalid value for 'ilp64'")
|
|
|
|
if not ilp64:
|
|
return _get_funcs(names, arrays, dtype,
|
|
"BLAS", _fblas, _cblas, "fblas", "cblas",
|
|
_blas_alias, ilp64=False)
|
|
else:
|
|
if not HAS_ILP64:
|
|
raise RuntimeError("BLAS ILP64 routine requested, but Scipy "
|
|
"compiled only with 32-bit BLAS")
|
|
return _get_funcs(names, arrays, dtype,
|
|
"BLAS", _fblas_64, None, "fblas_64", None,
|
|
_blas_alias, ilp64=True)
|