projektAI/venv/Lib/site-packages/pandas/util/_validators.py
2021-06-06 22:13:05 +02:00

425 lines
14 KiB
Python

"""
Module that contains many useful utilities
for validating data or function arguments
"""
from typing import Iterable, Sequence, Union
import warnings
import numpy as np
from pandas.core.dtypes.common import is_bool
def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
"""
Checks whether 'args' has length of at most 'compat_args'. Raises
a TypeError if that is not the case, similar to in Python when a
function is called with too many arguments.
"""
if max_fname_arg_count < 0:
raise ValueError("'max_fname_arg_count' must be non-negative")
if len(args) > len(compat_args):
max_arg_count = len(compat_args) + max_fname_arg_count
actual_arg_count = len(args) + max_fname_arg_count
argument = "argument" if max_arg_count == 1 else "arguments"
raise TypeError(
f"{fname}() takes at most {max_arg_count} {argument} "
f"({actual_arg_count} given)"
)
def _check_for_default_values(fname, arg_val_dict, compat_args):
"""
Check that the keys in `arg_val_dict` are mapped to their
default values as specified in `compat_args`.
Note that this function is to be called only when it has been
checked that arg_val_dict.keys() is a subset of compat_args
"""
for key in arg_val_dict:
# try checking equality directly with '=' operator,
# as comparison may have been overridden for the left
# hand object
try:
v1 = arg_val_dict[key]
v2 = compat_args[key]
# check for None-ness otherwise we could end up
# comparing a numpy array vs None
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
match = False
else:
match = v1 == v2
if not is_bool(match):
raise ValueError("'match' is not a boolean")
# could not compare them directly, so try comparison
# using the 'is' operator
except ValueError:
match = arg_val_dict[key] is compat_args[key]
if not match:
raise ValueError(
f"the '{key}' parameter is not supported in "
f"the pandas implementation of {fname}()"
)
def validate_args(fname, args, max_fname_arg_count, compat_args):
"""
Checks whether the length of the `*args` argument passed into a function
has at most `len(compat_args)` arguments and whether or not all of these
elements in `args` are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `*args` parameter
args : tuple
The `*args` parameter passed into a function
max_fname_arg_count : int
The maximum number of arguments that the function `fname`
can accept, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args : dict
A dictionary of keys and their associated default values.
In order to accommodate buggy behaviour in some versions of `numpy`,
where a signature displayed keyword arguments but then passed those
arguments **positionally** internally when calling downstream
implementations, a dict ensures that the original
order of the keyword arguments is enforced.
Raises
------
TypeError
If `args` contains more values than there are `compat_args`
ValueError
If `args` contains values that do not correspond to those
of the default values specified in `compat_args`
"""
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
# We do this so that we can provide a more informative
# error message about the parameters that we are not
# supporting in the pandas implementation of 'fname'
kwargs = dict(zip(compat_args, args))
_check_for_default_values(fname, kwargs, compat_args)
def _check_for_invalid_keys(fname, kwargs, compat_args):
"""
Checks whether 'kwargs' contains any keys that are not
in 'compat_args' and raises a TypeError if there is one.
"""
# set(dict) --> set of the dictionary's keys
diff = set(kwargs) - set(compat_args)
if diff:
bad_arg = list(diff)[0]
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
def validate_kwargs(fname, kwargs, compat_args):
"""
Checks whether parameters passed to the **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `**kwargs` parameter
kwargs : dict
The `**kwargs` parameter passed into `fname`
compat_args: dict
A dictionary of keys that `kwargs` is allowed to have and their
associated default values
Raises
------
TypeError if `kwargs` contains keys not in `compat_args`
ValueError if `kwargs` contains keys in `compat_args` that do not
map to the default values specified in `compat_args`
"""
kwds = kwargs.copy()
_check_for_invalid_keys(fname, kwargs, compat_args)
_check_for_default_values(fname, kwds, compat_args)
def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_args):
"""
Checks whether parameters passed to the *args and **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname: str
The name of the function being passed the `**kwargs` parameter
args: tuple
The `*args` parameter passed into a function
kwargs: dict
The `**kwargs` parameter passed into `fname`
max_fname_arg_count: int
The minimum number of arguments that the function `fname`
requires, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args: dict
A dictionary of keys that `kwargs` is allowed to
have and their associated default values.
Raises
------
TypeError if `args` contains more values than there are
`compat_args` OR `kwargs` contains keys not in `compat_args`
ValueError if `args` contains values not at the default value (`None`)
`kwargs` contains keys in `compat_args` that do not map to the default
value as specified in `compat_args`
See Also
--------
validate_args : Purely args validation.
validate_kwargs : Purely kwargs validation.
"""
# Check that the total number of arguments passed in (i.e.
# args and kwargs) does not exceed the length of compat_args
_check_arg_length(
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
)
# Check there is no overlap with the positional and keyword
# arguments, similar to what is done in actual Python functions
args_dict = dict(zip(compat_args, args))
for key in args_dict:
if key in kwargs:
raise TypeError(
f"{fname}() got multiple values for keyword argument '{key}'"
)
kwargs.update(args_dict)
validate_kwargs(fname, kwargs, compat_args)
def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False):
"""
Ensure that argument passed in arg_name can be interpreted as boolean.
Parameters
----------
value : bool
Value to be validated.
arg_name : str
Name of the argument. To be reflected in the error message.
none_allowed : bool, default True
Whether to consider None to be a valid boolean.
int_allowed : bool, default False
Whether to consider integer value to be a valid boolean.
Returns
-------
value
The same value as input.
Raises
------
ValueError
If the value is not a valid boolean.
"""
good_value = is_bool(value)
if none_allowed:
good_value = good_value or value is None
if int_allowed:
good_value = good_value or isinstance(value, int)
if not good_value:
raise ValueError(
f'For argument "{arg_name}" expected type bool, received '
f"type {type(value).__name__}."
)
return value
def validate_axis_style_args(data, args, kwargs, arg_name, method_name):
"""
Argument handler for mixed index, columns / axis functions
In an attempt to handle both `.method(index, columns)`, and
`.method(arg, axis=.)`, we have to do some bad things to argument
parsing. This translates all arguments to `{index=., columns=.}` style.
Parameters
----------
data : DataFrame
args : tuple
All positional arguments from the user
kwargs : dict
All keyword arguments from the user
arg_name, method_name : str
Used for better error messages
Returns
-------
kwargs : dict
A dictionary of keyword arguments. Doesn't modify ``kwargs``
inplace, so update them with the return value here.
Examples
--------
>>> df._validate_axis_style_args((str.upper,), {'columns': id},
... 'mapper', 'rename')
{'columns': <function id>, 'index': <method 'upper' of 'str' objects>}
This emits a warning
>>> df._validate_axis_style_args((str.upper, id), {},
... 'mapper', 'rename')
{'columns': <function id>, 'index': <method 'upper' of 'str' objects>}
"""
# TODO: Change to keyword-only args and remove all this
out = {}
# Goal: fill 'out' with index/columns-style arguments
# like out = {'index': foo, 'columns': bar}
# Start by validating for consistency
if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER):
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
raise TypeError(msg)
# First fill with explicit values provided by the user...
if arg_name in kwargs:
if args:
msg = f"{method_name} got multiple values for argument '{arg_name}'"
raise TypeError(msg)
axis = data._get_axis_name(kwargs.get("axis", 0))
out[axis] = kwargs[arg_name]
# More user-provided arguments, now from kwargs
for k, v in kwargs.items():
try:
ax = data._get_axis_name(k)
except ValueError:
pass
else:
out[ax] = v
# All user-provided kwargs have been handled now.
# Now we supplement with positional arguments, emitting warnings
# when there's ambiguity and raising when there's conflicts
if len(args) == 0:
pass # It's up to the function to decide if this is valid
elif len(args) == 1:
axis = data._get_axis_name(kwargs.get("axis", 0))
out[axis] = args[0]
elif len(args) == 2:
if "axis" in kwargs:
# Unambiguously wrong
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
raise TypeError(msg)
msg = (
f"Interpreting call\n\t'.{method_name}(a, b)' as "
f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
"arguments to remove any ambiguity. In the future, using "
"positional arguments for 'index' or 'columns' will raise "
"a 'TypeError'."
)
warnings.warn(msg, FutureWarning, stacklevel=4)
out[data._get_axis_name(0)] = args[0]
out[data._get_axis_name(1)] = args[1]
else:
msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
raise TypeError(msg)
return out
def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
"""
Validate the keyword arguments to 'fillna'.
This checks that exactly one of 'value' and 'method' is specified.
If 'method' is specified, this validates that it's a valid method.
Parameters
----------
value, method : object
The 'value' and 'method' keyword arguments for 'fillna'.
validate_scalar_dict_value : bool, default True
Whether to validate that 'value' is a scalar or dict. Specifically,
validate that it is not a list or tuple.
Returns
-------
value, method : object
"""
from pandas.core.missing import clean_fill_method
if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
elif value is None and method is not None:
method = clean_fill_method(method)
elif value is not None and method is None:
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")
return value, method
def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float or iterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
q_arr = np.asarray(q)
# Don't change this to an f-string. The string formatting
# is too expensive for cases where we don't need it.
msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg.format(q_arr / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg.format(q_arr / 100.0))
return q_arr
def validate_ascending(
ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True,
):
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
kwargs = {"none_allowed": False, "int_allowed": True}
if not isinstance(ascending, (list, tuple)):
return validate_bool_kwarg(ascending, "ascending", **kwargs)
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]